{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Use Soft Q Learning to Play LunarLander-v2\n",
    "\n",
    "PyTorch version"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "\n",
    "import sys\n",
    "import logging\n",
    "import itertools\n",
    "import copy\n",
    "\n",
    "import numpy as np\n",
    "np.random.seed(0)\n",
    "import pandas as pd\n",
    "import gym\n",
    "import matplotlib.pyplot as plt\n",
    "import torch\n",
    "torch.manual_seed(0)\n",
    "import torch.nn as nn\n",
    "import torch.optim as optim\n",
    "import torch.distributions as distributions\n",
    "\n",
    "logging.basicConfig(level=logging.DEBUG,\n",
    "        format='%(asctime)s [%(levelname)s] %(message)s',\n",
    "        stream=sys.stdout, datefmt='%H:%M:%S')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "15:30:04 [INFO] env: <LunarLander<LunarLander-v2>>\n",
      "15:30:04 [INFO] action_space: Discrete(4)\n",
      "15:30:04 [INFO] observation_space: Box(-inf, inf, (8,), float32)\n",
      "15:30:04 [INFO] reward_range: (-inf, inf)\n",
      "15:30:04 [INFO] metadata: {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50}\n",
      "15:30:04 [INFO] _max_episode_steps: 1000\n",
      "15:30:04 [INFO] _elapsed_steps: None\n",
      "15:30:04 [INFO] id: LunarLander-v2\n",
      "15:30:04 [INFO] entry_point: gym.envs.box2d:LunarLander\n",
      "15:30:04 [INFO] reward_threshold: 200\n",
      "15:30:04 [INFO] nondeterministic: False\n",
      "15:30:04 [INFO] max_episode_steps: 1000\n",
      "15:30:04 [INFO] _kwargs: {}\n",
      "15:30:04 [INFO] _env_name: LunarLander\n"
     ]
    }
   ],
   "source": [
    "env = gym.make('LunarLander-v2')\n",
    "env.seed(0)\n",
    "for key in vars(env):\n",
    "    logging.info('%s: %s', key, vars(env)[key])\n",
    "for key in vars(env.spec):\n",
    "    logging.info('%s: %s', key, vars(env.spec)[key])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "class DQNReplayer:\n",
    "    def __init__(self, capacity):\n",
    "        self.memory = pd.DataFrame(index=range(capacity),\n",
    "                columns=['state', 'action', 'reward', 'next_state', 'done'])\n",
    "        self.i = 0\n",
    "        self.count = 0\n",
    "        self.capacity = capacity\n",
    "\n",
    "    def store(self, *args):\n",
    "        self.memory.loc[self.i] = args\n",
    "        self.i = (self.i + 1) % self.capacity\n",
    "        self.count = min(self.count + 1, self.capacity)\n",
    "\n",
    "    def sample(self, size):\n",
    "        indices = np.random.choice(self.count, size=size)\n",
    "        return (np.stack(self.memory.loc[indices, field]) for field in\n",
    "                self.memory.columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "class SQLAgent:\n",
    "    def __init__(self, env):\n",
    "        self.action_n = env.action_space.n\n",
    "        self.gamma = 0.99\n",
    "\n",
    "        self.replayer = DQNReplayer(10000)\n",
    "\n",
    "        self.alpha = 0.02\n",
    "\n",
    "        self.evaluate_net = self.build_net(\n",
    "                input_size=env.observation_space.shape[0],\n",
    "                hidden_sizes=[256, 256], output_size=self.action_n)\n",
    "        self.optimizer = optim.Adam(self.evaluate_net.parameters(), lr=3e-4)\n",
    "        self.loss = nn.MSELoss()\n",
    "\n",
    "    def build_net(self, input_size, hidden_sizes, output_size):\n",
    "        layers = []\n",
    "        for input_size, output_size in zip(\n",
    "                [input_size,] + hidden_sizes, hidden_sizes + [output_size,]):\n",
    "            layers.append(nn.Linear(input_size, output_size))\n",
    "            layers.append(nn.ReLU())\n",
    "        layers = layers[:-1]\n",
    "        model = nn.Sequential(*layers)\n",
    "        return model\n",
    "\n",
    "    def reset(self, mode=None):\n",
    "        self.mode = mode\n",
    "        if self.mode == 'train':\n",
    "            self.trajectory = []\n",
    "            self.target_net = copy.deepcopy(self.evaluate_net)\n",
    "\n",
    "    def step(self, observation, reward, done):\n",
    "        state_tensor = torch.as_tensor(observation,\n",
    "                dtype=torch.float).squeeze(0)\n",
    "        q_div_alpha_tensor = self.evaluate_net(state_tensor) / self.alpha\n",
    "        v_div_alpha_tensor = torch.logsumexp(q_div_alpha_tensor, dim=-1,\n",
    "                keepdim=True)\n",
    "        prob_tensor = (q_div_alpha_tensor - v_div_alpha_tensor).exp()\n",
    "        action_tensor = distributions.Categorical(prob_tensor).sample()\n",
    "        action = action_tensor.item()\n",
    "        if self.mode == 'train':\n",
    "            self.trajectory += [observation, reward, done, action]\n",
    "            if len(self.trajectory) >= 8:\n",
    "                state, _, _, act, next_state, reward, done, _ = \\\n",
    "                        self.trajectory[-8:]\n",
    "                self.replayer.store(state, act, reward, next_state, done)\n",
    "            if self.replayer.count >= 500:\n",
    "                self.learn()\n",
    "        return action\n",
    "\n",
    "    def close(self):\n",
    "        pass\n",
    "\n",
    "    def learn(self):\n",
    "        # replay\n",
    "        states, actions, rewards, next_states, dones = \\\n",
    "                self.replayer.sample(128) # replay transitions\n",
    "        state_tensor = torch.as_tensor(states, dtype=torch.float)\n",
    "        action_tensor = torch.as_tensor(actions, dtype=torch.long)\n",
    "        reward_tensor = torch.as_tensor(rewards, dtype=torch.float)\n",
    "        next_state_tensor = torch.as_tensor(next_states, dtype=torch.float)\n",
    "        done_tensor = torch.as_tensor(dones, dtype=torch.float)\n",
    "\n",
    "        # train\n",
    "        next_q_tensor = self.target_net(next_state_tensor)\n",
    "        next_v_tensor = self.alpha * torch.logsumexp(next_q_tensor / self.alpha, dim=-1)\n",
    "        target_tensor = reward_tensor + self.gamma * (1. - done_tensor) * next_v_tensor\n",
    "        pred_tensor = self.evaluate_net(state_tensor)\n",
    "        q_tensor = pred_tensor.gather(1, action_tensor.unsqueeze(1)).squeeze(1)\n",
    "        loss_tensor = self.loss(q_tensor, target_tensor.detach())\n",
    "        self.optimizer.zero_grad()\n",
    "        loss_tensor.backward()\n",
    "        self.optimizer.step()\n",
    "\n",
    "\n",
    "agent = SQLAgent(env)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "15:30:04 [INFO] ==== train ====\n",
      "15:30:04 [DEBUG] train episode 0: reward = -461.18, steps = 83\n",
      "15:30:04 [DEBUG] train episode 1: reward = -668.99, steps = 69\n",
      "15:30:04 [DEBUG] train episode 2: reward = -458.32, steps = 73\n",
      "15:30:04 [DEBUG] train episode 3: reward = -493.66, steps = 56\n",
      "15:30:04 [DEBUG] train episode 4: reward = -546.61, steps = 76\n",
      "15:30:04 [DEBUG] train episode 5: reward = -592.39, steps = 75\n",
      "15:30:04 [DEBUG] train episode 6: reward = -591.07, steps = 68\n",
      "15:30:05 [DEBUG] train episode 7: reward = 10.01, steps = 58\n",
      "15:30:06 [DEBUG] train episode 8: reward = -101.41, steps = 53\n",
      "15:30:06 [DEBUG] train episode 9: reward = -184.76, steps = 77\n",
      "15:30:07 [DEBUG] train episode 10: reward = -127.31, steps = 61\n",
      "15:30:08 [DEBUG] train episode 11: reward = -162.83, steps = 71\n",
      "15:30:09 [DEBUG] train episode 12: reward = -151.20, steps = 79\n",
      "15:30:09 [DEBUG] train episode 13: reward = -142.71, steps = 80\n",
      "15:30:10 [DEBUG] train episode 14: reward = -159.48, steps = 66\n",
      "15:30:11 [DEBUG] train episode 15: reward = -109.68, steps = 77\n",
      "15:30:12 [DEBUG] train episode 16: reward = -162.97, steps = 68\n",
      "15:30:13 [DEBUG] train episode 17: reward = -78.89, steps = 55\n",
      "15:30:14 [DEBUG] train episode 18: reward = -856.41, steps = 154\n",
      "15:30:16 [DEBUG] train episode 19: reward = -717.81, steps = 110\n",
      "15:30:16 [DEBUG] train episode 20: reward = -109.94, steps = 63\n",
      "15:30:17 [DEBUG] train episode 21: reward = -124.91, steps = 61\n",
      "15:30:18 [DEBUG] train episode 22: reward = -260.72, steps = 100\n",
      "15:30:19 [DEBUG] train episode 23: reward = -118.52, steps = 50\n",
      "15:30:20 [DEBUG] train episode 24: reward = -44.96, steps = 89\n",
      "15:30:21 [DEBUG] train episode 25: reward = -84.43, steps = 56\n",
      "15:30:22 [DEBUG] train episode 26: reward = -74.78, steps = 87\n",
      "15:30:24 [DEBUG] train episode 27: reward = -382.92, steps = 175\n",
      "15:30:25 [DEBUG] train episode 28: reward = -179.34, steps = 84\n",
      "15:30:26 [DEBUG] train episode 29: reward = -59.86, steps = 56\n",
      "15:30:27 [DEBUG] train episode 30: reward = -65.31, steps = 94\n",
      "15:30:28 [DEBUG] train episode 31: reward = -49.00, steps = 75\n",
      "15:30:29 [DEBUG] train episode 32: reward = 42.47, steps = 88\n",
      "15:30:30 [DEBUG] train episode 33: reward = -196.26, steps = 91\n",
      "15:30:32 [DEBUG] train episode 34: reward = -1080.69, steps = 159\n",
      "15:30:35 [DEBUG] train episode 35: reward = -346.48, steps = 190\n",
      "15:30:37 [DEBUG] train episode 36: reward = -272.09, steps = 217\n",
      "15:30:38 [DEBUG] train episode 37: reward = -108.51, steps = 102\n",
      "15:30:39 [DEBUG] train episode 38: reward = -422.14, steps = 80\n",
      "15:30:40 [DEBUG] train episode 39: reward = -469.50, steps = 95\n",
      "15:30:42 [DEBUG] train episode 40: reward = -349.59, steps = 190\n",
      "15:30:43 [DEBUG] train episode 41: reward = -212.93, steps = 104\n",
      "15:30:45 [DEBUG] train episode 42: reward = -257.79, steps = 128\n",
      "15:30:47 [DEBUG] train episode 43: reward = -293.18, steps = 215\n",
      "15:30:49 [DEBUG] train episode 44: reward = -199.20, steps = 150\n",
      "15:30:51 [DEBUG] train episode 45: reward = -223.86, steps = 158\n",
      "15:30:52 [DEBUG] train episode 46: reward = -204.57, steps = 134\n",
      "15:30:54 [DEBUG] train episode 47: reward = -198.53, steps = 174\n",
      "15:30:56 [DEBUG] train episode 48: reward = -358.08, steps = 155\n",
      "15:30:58 [DEBUG] train episode 49: reward = -216.80, steps = 155\n",
      "15:30:59 [DEBUG] train episode 50: reward = -131.84, steps = 171\n",
      "15:31:01 [DEBUG] train episode 51: reward = -224.78, steps = 128\n",
      "15:31:03 [DEBUG] train episode 52: reward = -138.28, steps = 160\n",
      "15:31:04 [DEBUG] train episode 53: reward = -94.56, steps = 150\n",
      "15:31:06 [DEBUG] train episode 54: reward = -73.09, steps = 156\n",
      "15:31:09 [DEBUG] train episode 55: reward = -381.66, steps = 242\n",
      "15:31:10 [DEBUG] train episode 56: reward = -280.63, steps = 149\n",
      "15:31:13 [DEBUG] train episode 57: reward = -262.54, steps = 236\n",
      "15:31:15 [DEBUG] train episode 58: reward = -494.19, steps = 168\n",
      "15:31:17 [DEBUG] train episode 59: reward = -168.34, steps = 149\n",
      "15:31:19 [DEBUG] train episode 60: reward = -319.37, steps = 232\n",
      "15:31:21 [DEBUG] train episode 61: reward = -172.28, steps = 137\n",
      "15:31:23 [DEBUG] train episode 62: reward = -217.15, steps = 143\n",
      "15:31:24 [DEBUG] train episode 63: reward = -216.08, steps = 154\n",
      "15:31:30 [DEBUG] train episode 64: reward = -192.72, steps = 434\n",
      "15:31:32 [DEBUG] train episode 65: reward = -186.88, steps = 149\n",
      "15:31:33 [DEBUG] train episode 66: reward = -183.78, steps = 139\n",
      "15:31:35 [DEBUG] train episode 67: reward = -352.77, steps = 167\n",
      "15:31:37 [DEBUG] train episode 68: reward = 15.43, steps = 149\n",
      "15:31:38 [DEBUG] train episode 69: reward = -253.02, steps = 124\n",
      "15:31:42 [DEBUG] train episode 70: reward = -259.30, steps = 325\n",
      "15:31:44 [DEBUG] train episode 71: reward = -118.72, steps = 189\n",
      "15:31:47 [DEBUG] train episode 72: reward = -279.75, steps = 231\n",
      "15:31:50 [DEBUG] train episode 73: reward = -291.15, steps = 217\n",
      "15:31:51 [DEBUG] train episode 74: reward = -136.00, steps = 140\n",
      "15:31:53 [DEBUG] train episode 75: reward = -189.93, steps = 153\n",
      "15:31:55 [DEBUG] train episode 76: reward = -196.52, steps = 154\n",
      "15:31:56 [DEBUG] train episode 77: reward = -101.84, steps = 160\n",
      "15:31:58 [DEBUG] train episode 78: reward = -180.87, steps = 146\n",
      "15:32:02 [DEBUG] train episode 79: reward = -271.79, steps = 283\n",
      "15:32:05 [DEBUG] train episode 80: reward = -444.22, steps = 248\n",
      "15:32:10 [DEBUG] train episode 81: reward = -197.07, steps = 343\n",
      "15:32:12 [DEBUG] train episode 82: reward = -102.25, steps = 140\n",
      "15:32:17 [DEBUG] train episode 83: reward = -220.68, steps = 253\n",
      "15:32:18 [DEBUG] train episode 84: reward = -187.62, steps = 101\n",
      "15:32:21 [DEBUG] train episode 85: reward = -233.20, steps = 206\n",
      "15:32:26 [DEBUG] train episode 86: reward = -266.51, steps = 369\n",
      "15:32:31 [DEBUG] train episode 87: reward = -232.73, steps = 340\n",
      "15:32:35 [DEBUG] train episode 88: reward = -165.14, steps = 301\n",
      "15:32:52 [DEBUG] train episode 89: reward = -167.00, steps = 1000\n",
      "15:32:56 [DEBUG] train episode 90: reward = -237.51, steps = 219\n",
      "15:33:13 [DEBUG] train episode 91: reward = -201.66, steps = 1000\n",
      "15:33:15 [DEBUG] train episode 92: reward = -180.10, steps = 188\n",
      "15:33:31 [DEBUG] train episode 93: reward = -148.77, steps = 1000\n",
      "15:33:47 [DEBUG] train episode 94: reward = -150.70, steps = 1000\n",
      "15:34:02 [DEBUG] train episode 95: reward = -181.40, steps = 1000\n",
      "15:34:04 [DEBUG] train episode 96: reward = -100.86, steps = 141\n",
      "15:34:08 [DEBUG] train episode 97: reward = -154.23, steps = 246\n",
      "15:34:23 [DEBUG] train episode 98: reward = -161.37, steps = 1000\n",
      "15:34:24 [DEBUG] train episode 99: reward = -291.60, steps = 95\n",
      "15:34:40 [DEBUG] train episode 100: reward = -149.28, steps = 1000\n",
      "15:34:56 [DEBUG] train episode 101: reward = -140.16, steps = 1000\n",
      "15:35:12 [DEBUG] train episode 102: reward = -201.76, steps = 1000\n",
      "15:35:29 [DEBUG] train episode 103: reward = -156.56, steps = 1000\n",
      "15:35:47 [DEBUG] train episode 104: reward = -130.02, steps = 1000\n",
      "15:36:03 [DEBUG] train episode 105: reward = -165.13, steps = 1000\n",
      "15:36:21 [DEBUG] train episode 106: reward = -209.76, steps = 1000\n",
      "15:36:36 [DEBUG] train episode 107: reward = -161.81, steps = 1000\n",
      "15:36:37 [DEBUG] train episode 108: reward = -145.19, steps = 83\n",
      "15:36:52 [DEBUG] train episode 109: reward = -179.72, steps = 1000\n",
      "15:37:07 [DEBUG] train episode 110: reward = -192.56, steps = 1000\n",
      "15:37:22 [DEBUG] train episode 111: reward = -202.50, steps = 1000\n",
      "15:37:38 [DEBUG] train episode 112: reward = -167.27, steps = 1000\n",
      "15:37:56 [DEBUG] train episode 113: reward = -178.42, steps = 1000\n",
      "15:38:11 [DEBUG] train episode 114: reward = -175.40, steps = 1000\n",
      "15:38:26 [DEBUG] train episode 115: reward = -205.94, steps = 1000\n",
      "15:38:42 [DEBUG] train episode 116: reward = -165.85, steps = 1000\n",
      "15:38:58 [DEBUG] train episode 117: reward = -183.95, steps = 1000\n",
      "15:39:14 [DEBUG] train episode 118: reward = -201.14, steps = 1000\n",
      "15:39:30 [DEBUG] train episode 119: reward = -155.56, steps = 1000\n",
      "15:39:45 [DEBUG] train episode 120: reward = -139.84, steps = 1000\n",
      "15:40:02 [DEBUG] train episode 121: reward = -141.59, steps = 1000\n",
      "15:40:18 [DEBUG] train episode 122: reward = -167.62, steps = 1000\n",
      "15:40:34 [DEBUG] train episode 123: reward = -148.34, steps = 1000\n",
      "15:40:48 [DEBUG] train episode 124: reward = -98.38, steps = 1000\n",
      "15:41:06 [DEBUG] train episode 125: reward = -173.27, steps = 1000\n",
      "15:41:22 [DEBUG] train episode 126: reward = -134.38, steps = 1000\n",
      "15:41:39 [DEBUG] train episode 127: reward = -123.90, steps = 1000\n",
      "15:41:41 [DEBUG] train episode 128: reward = -260.99, steps = 135\n",
      "15:41:58 [DEBUG] train episode 129: reward = -205.37, steps = 1000\n",
      "15:42:15 [DEBUG] train episode 130: reward = -161.72, steps = 1000\n",
      "15:42:31 [DEBUG] train episode 131: reward = -171.49, steps = 1000\n",
      "15:42:47 [DEBUG] train episode 132: reward = -145.85, steps = 1000\n",
      "15:43:03 [DEBUG] train episode 133: reward = -140.45, steps = 1000\n",
      "15:43:18 [DEBUG] train episode 134: reward = -144.73, steps = 1000\n",
      "15:43:37 [DEBUG] train episode 135: reward = -145.92, steps = 1000\n",
      "15:43:54 [DEBUG] train episode 136: reward = -131.38, steps = 1000\n",
      "15:44:13 [DEBUG] train episode 137: reward = -159.54, steps = 1000\n",
      "15:44:30 [DEBUG] train episode 138: reward = -164.33, steps = 1000\n",
      "15:44:51 [DEBUG] train episode 139: reward = -151.81, steps = 1000\n",
      "15:45:10 [DEBUG] train episode 140: reward = -159.38, steps = 1000\n",
      "15:45:29 [DEBUG] train episode 141: reward = -119.14, steps = 1000\n",
      "15:45:46 [DEBUG] train episode 142: reward = -149.16, steps = 1000\n",
      "15:46:03 [DEBUG] train episode 143: reward = -134.57, steps = 1000\n",
      "15:46:08 [DEBUG] train episode 144: reward = -172.85, steps = 329\n",
      "15:46:27 [DEBUG] train episode 145: reward = -146.75, steps = 1000\n",
      "15:46:40 [DEBUG] train episode 146: reward = -228.53, steps = 671\n",
      "15:46:57 [DEBUG] train episode 147: reward = -133.78, steps = 1000\n",
      "15:47:17 [DEBUG] train episode 148: reward = -170.46, steps = 1000\n",
      "15:47:34 [DEBUG] train episode 149: reward = -132.13, steps = 1000\n",
      "15:47:51 [DEBUG] train episode 150: reward = -155.12, steps = 1000\n",
      "15:48:09 [DEBUG] train episode 151: reward = -115.96, steps = 1000\n",
      "15:48:26 [DEBUG] train episode 152: reward = -128.12, steps = 1000\n",
      "15:48:46 [DEBUG] train episode 153: reward = -136.80, steps = 1000\n",
      "15:49:06 [DEBUG] train episode 154: reward = -130.60, steps = 1000\n",
      "15:49:24 [DEBUG] train episode 155: reward = -104.23, steps = 1000\n",
      "15:49:51 [DEBUG] train episode 156: reward = -154.83, steps = 1000\n",
      "15:50:10 [DEBUG] train episode 157: reward = -132.27, steps = 1000\n",
      "15:50:14 [DEBUG] train episode 158: reward = -141.06, steps = 257\n",
      "15:50:30 [DEBUG] train episode 159: reward = -129.47, steps = 1000\n",
      "15:50:31 [DEBUG] train episode 160: reward = -299.50, steps = 80\n",
      "15:50:48 [DEBUG] train episode 161: reward = -140.84, steps = 1000\n",
      "15:51:00 [DEBUG] train episode 162: reward = -244.77, steps = 816\n",
      "15:51:16 [DEBUG] train episode 163: reward = -91.54, steps = 1000\n",
      "15:51:28 [DEBUG] train episode 164: reward = -195.96, steps = 743\n",
      "15:51:30 [DEBUG] train episode 165: reward = -124.06, steps = 139\n",
      "15:51:46 [DEBUG] train episode 166: reward = -132.26, steps = 1000\n",
      "15:51:50 [DEBUG] train episode 167: reward = -77.08, steps = 289\n",
      "15:51:53 [DEBUG] train episode 168: reward = -75.81, steps = 231\n",
      "15:52:08 [DEBUG] train episode 169: reward = -118.95, steps = 1000\n",
      "15:52:15 [DEBUG] train episode 170: reward = -234.10, steps = 453\n",
      "15:52:32 [DEBUG] train episode 171: reward = -128.45, steps = 1000\n",
      "15:52:41 [DEBUG] train episode 172: reward = -331.69, steps = 600\n",
      "15:52:58 [DEBUG] train episode 173: reward = -73.57, steps = 1000\n",
      "15:53:15 [DEBUG] train episode 174: reward = -114.12, steps = 1000\n",
      "15:53:32 [DEBUG] train episode 175: reward = -160.87, steps = 1000\n",
      "15:53:50 [DEBUG] train episode 176: reward = -134.43, steps = 1000\n",
      "15:54:06 [DEBUG] train episode 177: reward = -116.93, steps = 1000\n",
      "15:54:22 [DEBUG] train episode 178: reward = -178.51, steps = 1000\n",
      "15:54:39 [DEBUG] train episode 179: reward = -84.95, steps = 1000\n",
      "15:54:54 [DEBUG] train episode 180: reward = -117.83, steps = 1000\n",
      "15:55:11 [DEBUG] train episode 181: reward = -127.32, steps = 1000\n",
      "15:55:26 [DEBUG] train episode 182: reward = -105.13, steps = 1000\n",
      "15:55:43 [DEBUG] train episode 183: reward = -117.50, steps = 1000\n",
      "15:55:58 [DEBUG] train episode 184: reward = -107.08, steps = 1000\n",
      "15:56:16 [DEBUG] train episode 185: reward = -111.60, steps = 1000\n",
      "15:56:32 [DEBUG] train episode 186: reward = -101.00, steps = 1000\n",
      "15:56:48 [DEBUG] train episode 187: reward = -106.90, steps = 1000\n",
      "15:57:04 [DEBUG] train episode 188: reward = -116.72, steps = 1000\n",
      "15:57:21 [DEBUG] train episode 189: reward = -145.83, steps = 1000\n",
      "15:57:38 [DEBUG] train episode 190: reward = -94.56, steps = 1000\n",
      "15:57:56 [DEBUG] train episode 191: reward = -158.11, steps = 1000\n",
      "15:58:14 [DEBUG] train episode 192: reward = -159.91, steps = 1000\n",
      "15:58:32 [DEBUG] train episode 193: reward = -119.21, steps = 1000\n",
      "15:58:47 [DEBUG] train episode 194: reward = -115.40, steps = 1000\n",
      "15:59:04 [DEBUG] train episode 195: reward = -68.14, steps = 1000\n",
      "15:59:23 [DEBUG] train episode 196: reward = -132.07, steps = 1000\n",
      "15:59:40 [DEBUG] train episode 197: reward = -144.48, steps = 1000\n",
      "15:59:57 [DEBUG] train episode 198: reward = -116.21, steps = 1000\n",
      "15:59:58 [DEBUG] train episode 199: reward = -102.59, steps = 58\n",
      "16:00:14 [DEBUG] train episode 200: reward = -103.85, steps = 1000\n",
      "16:00:31 [DEBUG] train episode 201: reward = -131.40, steps = 1000\n",
      "16:00:52 [DEBUG] train episode 202: reward = -70.19, steps = 1000\n",
      "16:01:09 [DEBUG] train episode 203: reward = -142.31, steps = 1000\n",
      "16:01:25 [DEBUG] train episode 204: reward = -145.11, steps = 1000\n",
      "16:01:41 [DEBUG] train episode 205: reward = -124.93, steps = 1000\n",
      "16:01:56 [DEBUG] train episode 206: reward = -74.29, steps = 1000\n",
      "16:02:12 [DEBUG] train episode 207: reward = -33.20, steps = 1000\n",
      "16:02:28 [DEBUG] train episode 208: reward = -89.19, steps = 1000\n",
      "16:02:44 [DEBUG] train episode 209: reward = -67.87, steps = 1000\n",
      "16:03:00 [DEBUG] train episode 210: reward = -90.71, steps = 1000\n",
      "16:03:16 [DEBUG] train episode 211: reward = -110.03, steps = 1000\n",
      "16:03:32 [DEBUG] train episode 212: reward = -66.46, steps = 1000\n",
      "16:03:49 [DEBUG] train episode 213: reward = -98.37, steps = 1000\n",
      "16:04:05 [DEBUG] train episode 214: reward = -56.01, steps = 1000\n",
      "16:04:21 [DEBUG] train episode 215: reward = -89.63, steps = 1000\n",
      "16:04:38 [DEBUG] train episode 216: reward = -84.69, steps = 1000\n",
      "16:04:54 [DEBUG] train episode 217: reward = -67.74, steps = 1000\n",
      "16:05:11 [DEBUG] train episode 218: reward = -105.08, steps = 1000\n",
      "16:05:12 [DEBUG] train episode 219: reward = -91.82, steps = 52\n",
      "16:05:28 [DEBUG] train episode 220: reward = -55.65, steps = 1000\n",
      "16:05:43 [DEBUG] train episode 221: reward = -86.73, steps = 1000\n",
      "16:05:58 [DEBUG] train episode 222: reward = -45.91, steps = 1000\n",
      "16:06:16 [DEBUG] train episode 223: reward = -49.99, steps = 1000\n",
      "16:06:32 [DEBUG] train episode 224: reward = -32.55, steps = 1000\n",
      "16:06:49 [DEBUG] train episode 225: reward = -78.87, steps = 1000\n",
      "16:07:06 [DEBUG] train episode 226: reward = -44.40, steps = 1000\n",
      "16:07:22 [DEBUG] train episode 227: reward = -54.29, steps = 1000\n",
      "16:07:38 [DEBUG] train episode 228: reward = -82.28, steps = 1000\n",
      "16:07:54 [DEBUG] train episode 229: reward = -68.36, steps = 1000\n",
      "16:08:09 [DEBUG] train episode 230: reward = -26.31, steps = 1000\n",
      "16:08:24 [DEBUG] train episode 231: reward = -86.26, steps = 1000\n",
      "16:08:40 [DEBUG] train episode 232: reward = -46.15, steps = 1000\n",
      "16:08:56 [DEBUG] train episode 233: reward = -155.39, steps = 1000\n",
      "16:09:12 [DEBUG] train episode 234: reward = -41.64, steps = 1000\n",
      "16:09:29 [DEBUG] train episode 235: reward = -88.05, steps = 1000\n",
      "16:09:46 [DEBUG] train episode 236: reward = -87.17, steps = 1000\n",
      "16:10:01 [DEBUG] train episode 237: reward = -168.74, steps = 991\n",
      "16:10:14 [DEBUG] train episode 238: reward = -159.35, steps = 821\n",
      "16:10:31 [DEBUG] train episode 239: reward = -89.67, steps = 1000\n",
      "16:10:47 [DEBUG] train episode 240: reward = -229.81, steps = 944\n",
      "16:11:05 [DEBUG] train episode 241: reward = -48.93, steps = 1000\n",
      "16:11:20 [DEBUG] train episode 242: reward = -30.57, steps = 1000\n",
      "16:11:38 [DEBUG] train episode 243: reward = -98.84, steps = 1000\n",
      "16:11:54 [DEBUG] train episode 244: reward = -59.22, steps = 1000\n",
      "16:12:11 [DEBUG] train episode 245: reward = -47.49, steps = 1000\n",
      "16:12:29 [DEBUG] train episode 246: reward = -49.54, steps = 1000\n",
      "16:12:45 [DEBUG] train episode 247: reward = -55.58, steps = 1000\n",
      "16:13:01 [DEBUG] train episode 248: reward = -261.72, steps = 969\n",
      "16:13:19 [DEBUG] train episode 249: reward = -40.42, steps = 1000\n",
      "16:13:35 [DEBUG] train episode 250: reward = -32.94, steps = 1000\n",
      "16:13:52 [DEBUG] train episode 251: reward = -24.56, steps = 1000\n",
      "16:14:08 [DEBUG] train episode 252: reward = -16.88, steps = 1000\n",
      "16:14:26 [DEBUG] train episode 253: reward = -56.63, steps = 1000\n",
      "16:14:42 [DEBUG] train episode 254: reward = -38.38, steps = 1000\n",
      "16:14:59 [DEBUG] train episode 255: reward = -54.97, steps = 1000\n",
      "16:15:17 [DEBUG] train episode 256: reward = -79.20, steps = 1000\n",
      "16:15:35 [DEBUG] train episode 257: reward = -76.28, steps = 1000\n",
      "16:15:52 [DEBUG] train episode 258: reward = -59.60, steps = 1000\n",
      "16:16:09 [DEBUG] train episode 259: reward = -35.59, steps = 1000\n",
      "16:16:27 [DEBUG] train episode 260: reward = -51.68, steps = 1000\n",
      "16:16:44 [DEBUG] train episode 261: reward = -51.66, steps = 1000\n",
      "16:17:01 [DEBUG] train episode 262: reward = -32.21, steps = 1000\n",
      "16:17:19 [DEBUG] train episode 263: reward = -74.43, steps = 1000\n",
      "16:17:36 [DEBUG] train episode 264: reward = -60.47, steps = 1000\n",
      "16:17:54 [DEBUG] train episode 265: reward = -34.44, steps = 1000\n",
      "16:18:11 [DEBUG] train episode 266: reward = -17.53, steps = 1000\n",
      "16:18:28 [DEBUG] train episode 267: reward = -50.44, steps = 1000\n",
      "16:18:47 [DEBUG] train episode 268: reward = -31.32, steps = 1000\n",
      "16:19:05 [DEBUG] train episode 269: reward = -74.84, steps = 1000\n",
      "16:19:23 [DEBUG] train episode 270: reward = -71.76, steps = 1000\n",
      "16:19:42 [DEBUG] train episode 271: reward = -26.28, steps = 1000\n",
      "16:20:00 [DEBUG] train episode 272: reward = -78.65, steps = 1000\n",
      "16:20:19 [DEBUG] train episode 273: reward = -54.83, steps = 1000\n",
      "16:20:36 [DEBUG] train episode 274: reward = -20.61, steps = 1000\n",
      "16:20:53 [DEBUG] train episode 275: reward = -57.88, steps = 1000\n",
      "16:21:12 [DEBUG] train episode 276: reward = -44.64, steps = 1000\n",
      "16:21:28 [DEBUG] train episode 277: reward = -27.04, steps = 1000\n",
      "16:21:45 [DEBUG] train episode 278: reward = -39.71, steps = 1000\n",
      "16:22:05 [DEBUG] train episode 279: reward = -55.80, steps = 1000\n",
      "16:22:23 [DEBUG] train episode 280: reward = -67.25, steps = 1000\n",
      "16:22:39 [DEBUG] train episode 281: reward = -33.43, steps = 1000\n",
      "16:22:59 [DEBUG] train episode 282: reward = -36.89, steps = 1000\n",
      "16:23:17 [DEBUG] train episode 283: reward = -109.18, steps = 1000\n",
      "16:23:23 [DEBUG] train episode 284: reward = -637.89, steps = 368\n",
      "16:23:41 [DEBUG] train episode 285: reward = -56.64, steps = 1000\n",
      "16:23:58 [DEBUG] train episode 286: reward = -60.65, steps = 1000\n",
      "16:24:14 [DEBUG] train episode 287: reward = -23.12, steps = 1000\n",
      "16:24:31 [DEBUG] train episode 288: reward = -67.32, steps = 1000\n",
      "16:24:46 [DEBUG] train episode 289: reward = -70.14, steps = 1000\n",
      "16:25:03 [DEBUG] train episode 290: reward = -11.99, steps = 1000\n",
      "16:25:21 [DEBUG] train episode 291: reward = -82.50, steps = 1000\n",
      "16:25:38 [DEBUG] train episode 292: reward = -24.19, steps = 1000\n",
      "16:25:55 [DEBUG] train episode 293: reward = -75.30, steps = 1000\n",
      "16:26:11 [DEBUG] train episode 294: reward = -55.99, steps = 1000\n",
      "16:26:27 [DEBUG] train episode 295: reward = 10.52, steps = 1000\n",
      "16:26:43 [DEBUG] train episode 296: reward = -49.16, steps = 1000\n",
      "16:27:02 [DEBUG] train episode 297: reward = -57.18, steps = 1000\n",
      "16:27:18 [DEBUG] train episode 298: reward = -35.64, steps = 1000\n",
      "16:27:35 [DEBUG] train episode 299: reward = -661.19, steps = 1000\n",
      "16:27:52 [DEBUG] train episode 300: reward = -66.01, steps = 1000\n",
      "16:27:52 [DEBUG] train episode 301: reward = -109.61, steps = 53\n",
      "16:28:08 [DEBUG] train episode 302: reward = -184.58, steps = 1000\n",
      "16:28:24 [DEBUG] train episode 303: reward = -61.92, steps = 1000\n",
      "16:28:41 [DEBUG] train episode 304: reward = -71.44, steps = 1000\n",
      "16:28:43 [DEBUG] train episode 305: reward = -105.97, steps = 180\n",
      "16:29:00 [DEBUG] train episode 306: reward = -5.41, steps = 1000\n",
      "16:29:16 [DEBUG] train episode 307: reward = -89.25, steps = 1000\n",
      "16:29:33 [DEBUG] train episode 308: reward = -80.65, steps = 1000\n",
      "16:29:50 [DEBUG] train episode 309: reward = -35.80, steps = 1000\n",
      "16:30:07 [DEBUG] train episode 310: reward = -41.52, steps = 1000\n",
      "16:30:23 [DEBUG] train episode 311: reward = -82.70, steps = 1000\n",
      "16:30:25 [DEBUG] train episode 312: reward = -120.78, steps = 131\n",
      "16:30:30 [DEBUG] train episode 313: reward = -174.65, steps = 297\n",
      "16:30:32 [DEBUG] train episode 314: reward = -60.49, steps = 185\n",
      "16:30:48 [DEBUG] train episode 315: reward = -33.68, steps = 1000\n",
      "16:30:50 [DEBUG] train episode 316: reward = -327.84, steps = 130\n",
      "16:30:52 [DEBUG] train episode 317: reward = -368.02, steps = 134\n",
      "16:30:55 [DEBUG] train episode 318: reward = -374.26, steps = 207\n",
      "16:30:57 [DEBUG] train episode 319: reward = -258.82, steps = 108\n",
      "16:31:13 [DEBUG] train episode 320: reward = -31.46, steps = 1000\n",
      "16:31:30 [DEBUG] train episode 321: reward = -45.66, steps = 1000\n",
      "16:31:46 [DEBUG] train episode 322: reward = -34.57, steps = 1000\n",
      "16:31:52 [DEBUG] train episode 323: reward = -209.25, steps = 403\n",
      "16:32:08 [DEBUG] train episode 324: reward = -39.94, steps = 1000\n",
      "16:32:11 [DEBUG] train episode 325: reward = -135.46, steps = 163\n",
      "16:32:27 [DEBUG] train episode 326: reward = -10.92, steps = 1000\n",
      "16:32:43 [DEBUG] train episode 327: reward = -45.30, steps = 1000\n",
      "16:33:00 [DEBUG] train episode 328: reward = -51.49, steps = 1000\n",
      "16:33:09 [DEBUG] train episode 329: reward = -22.21, steps = 524\n",
      "16:33:25 [DEBUG] train episode 330: reward = -32.08, steps = 1000\n",
      "16:33:32 [DEBUG] train episode 331: reward = -189.42, steps = 447\n",
      "16:33:35 [DEBUG] train episode 332: reward = -78.06, steps = 209\n",
      "16:33:42 [DEBUG] train episode 333: reward = -208.59, steps = 487\n",
      "16:33:59 [DEBUG] train episode 334: reward = -12.92, steps = 1000\n",
      "16:34:15 [DEBUG] train episode 335: reward = -26.31, steps = 1000\n",
      "16:34:32 [DEBUG] train episode 336: reward = -78.34, steps = 1000\n",
      "16:34:48 [DEBUG] train episode 337: reward = 21.48, steps = 1000\n",
      "16:35:06 [DEBUG] train episode 338: reward = -58.45, steps = 1000\n",
      "16:35:22 [DEBUG] train episode 339: reward = 11.60, steps = 1000\n",
      "16:35:38 [DEBUG] train episode 340: reward = 10.61, steps = 1000\n",
      "16:35:53 [DEBUG] train episode 341: reward = 3.54, steps = 1000\n",
      "16:36:09 [DEBUG] train episode 342: reward = 15.60, steps = 1000\n",
      "16:36:09 [DEBUG] train episode 343: reward = -119.59, steps = 52\n",
      "16:36:24 [DEBUG] train episode 344: reward = -32.67, steps = 1000\n",
      "16:36:40 [DEBUG] train episode 345: reward = 10.68, steps = 1000\n",
      "16:36:57 [DEBUG] train episode 346: reward = -35.37, steps = 1000\n",
      "16:37:12 [DEBUG] train episode 347: reward = 9.28, steps = 1000\n",
      "16:37:28 [DEBUG] train episode 348: reward = -22.10, steps = 1000\n",
      "16:37:44 [DEBUG] train episode 349: reward = 24.38, steps = 1000\n",
      "16:38:00 [DEBUG] train episode 350: reward = 8.58, steps = 1000\n",
      "16:38:17 [DEBUG] train episode 351: reward = 1.40, steps = 1000\n",
      "16:38:35 [DEBUG] train episode 352: reward = -51.02, steps = 1000\n",
      "16:38:52 [DEBUG] train episode 353: reward = -68.66, steps = 1000\n",
      "16:39:08 [DEBUG] train episode 354: reward = 13.89, steps = 1000\n",
      "16:39:24 [DEBUG] train episode 355: reward = 8.85, steps = 1000\n",
      "16:39:40 [DEBUG] train episode 356: reward = -37.26, steps = 1000\n",
      "16:39:56 [DEBUG] train episode 357: reward = 12.02, steps = 1000\n",
      "16:40:12 [DEBUG] train episode 358: reward = -47.81, steps = 1000\n",
      "16:40:28 [DEBUG] train episode 359: reward = -21.64, steps = 1000\n",
      "16:40:45 [DEBUG] train episode 360: reward = 4.33, steps = 1000\n",
      "16:41:02 [DEBUG] train episode 361: reward = -14.15, steps = 1000\n",
      "16:41:18 [DEBUG] train episode 362: reward = -0.50, steps = 1000\n",
      "16:41:35 [DEBUG] train episode 363: reward = -32.45, steps = 1000\n",
      "16:41:52 [DEBUG] train episode 364: reward = -44.04, steps = 1000\n",
      "16:42:08 [DEBUG] train episode 365: reward = -7.90, steps = 1000\n",
      "16:42:26 [DEBUG] train episode 366: reward = -42.57, steps = 1000\n",
      "16:42:42 [DEBUG] train episode 367: reward = -22.23, steps = 1000\n",
      "16:43:00 [DEBUG] train episode 368: reward = -26.27, steps = 1000\n",
      "16:43:16 [DEBUG] train episode 369: reward = -16.67, steps = 1000\n",
      "16:43:34 [DEBUG] train episode 370: reward = -12.73, steps = 1000\n",
      "16:43:51 [DEBUG] train episode 371: reward = -13.88, steps = 1000\n",
      "16:44:08 [DEBUG] train episode 372: reward = -13.60, steps = 1000\n",
      "16:44:24 [DEBUG] train episode 373: reward = 20.07, steps = 1000\n",
      "16:44:43 [DEBUG] train episode 374: reward = -39.98, steps = 1000\n",
      "16:44:59 [DEBUG] train episode 375: reward = 1.83, steps = 1000\n",
      "16:45:16 [DEBUG] train episode 376: reward = 8.48, steps = 1000\n",
      "16:45:32 [DEBUG] train episode 377: reward = -35.34, steps = 1000\n",
      "16:45:49 [DEBUG] train episode 378: reward = -15.22, steps = 1000\n",
      "16:46:05 [DEBUG] train episode 379: reward = -11.67, steps = 1000\n",
      "16:46:07 [DEBUG] train episode 380: reward = -91.93, steps = 135\n",
      "16:46:23 [DEBUG] train episode 381: reward = 3.62, steps = 1000\n",
      "16:46:39 [DEBUG] train episode 382: reward = -17.76, steps = 1000\n",
      "16:46:56 [DEBUG] train episode 383: reward = -45.99, steps = 1000\n",
      "16:47:13 [DEBUG] train episode 384: reward = 14.77, steps = 1000\n",
      "16:47:29 [DEBUG] train episode 385: reward = 7.12, steps = 1000\n",
      "16:47:36 [DEBUG] train episode 386: reward = -497.05, steps = 434\n",
      "16:47:52 [DEBUG] train episode 387: reward = -8.96, steps = 1000\n",
      "16:48:10 [DEBUG] train episode 388: reward = -62.27, steps = 1000\n",
      "16:48:26 [DEBUG] train episode 389: reward = -9.82, steps = 1000\n",
      "16:48:43 [DEBUG] train episode 390: reward = -13.88, steps = 1000\n",
      "16:48:59 [DEBUG] train episode 391: reward = -50.66, steps = 1000\n",
      "16:49:17 [DEBUG] train episode 392: reward = 20.18, steps = 1000\n",
      "16:49:33 [DEBUG] train episode 393: reward = -36.30, steps = 1000\n",
      "16:49:51 [DEBUG] train episode 394: reward = -6.69, steps = 1000\n",
      "16:50:08 [DEBUG] train episode 395: reward = -41.34, steps = 1000\n",
      "16:50:24 [DEBUG] train episode 396: reward = 10.24, steps = 1000\n",
      "16:50:41 [DEBUG] train episode 397: reward = -3.72, steps = 1000\n",
      "16:50:59 [DEBUG] train episode 398: reward = 9.63, steps = 1000\n",
      "16:51:15 [DEBUG] train episode 399: reward = 3.34, steps = 1000\n",
      "16:51:33 [DEBUG] train episode 400: reward = -26.98, steps = 1000\n",
      "16:51:50 [DEBUG] train episode 401: reward = -10.55, steps = 1000\n",
      "16:52:08 [DEBUG] train episode 402: reward = -61.39, steps = 1000\n",
      "16:52:25 [DEBUG] train episode 403: reward = -3.59, steps = 1000\n",
      "16:52:41 [DEBUG] train episode 404: reward = -11.97, steps = 1000\n",
      "16:52:59 [DEBUG] train episode 405: reward = 18.61, steps = 1000\n",
      "16:53:15 [DEBUG] train episode 406: reward = -3.39, steps = 1000\n",
      "16:53:34 [DEBUG] train episode 407: reward = -15.20, steps = 1000\n",
      "16:53:51 [DEBUG] train episode 408: reward = -45.63, steps = 1000\n",
      "16:54:07 [DEBUG] train episode 409: reward = -18.62, steps = 1000\n",
      "16:54:24 [DEBUG] train episode 410: reward = 5.25, steps = 1000\n",
      "16:54:41 [DEBUG] train episode 411: reward = -11.82, steps = 1000\n",
      "16:54:58 [DEBUG] train episode 412: reward = 8.86, steps = 1000\n",
      "16:55:15 [DEBUG] train episode 413: reward = 1.93, steps = 1000\n",
      "16:55:33 [DEBUG] train episode 414: reward = -2.19, steps = 1000\n",
      "16:55:50 [DEBUG] train episode 415: reward = -9.91, steps = 1000\n",
      "16:56:08 [DEBUG] train episode 416: reward = -13.03, steps = 1000\n",
      "16:56:25 [DEBUG] train episode 417: reward = -60.89, steps = 1000\n",
      "16:56:42 [DEBUG] train episode 418: reward = -30.01, steps = 1000\n",
      "16:57:00 [DEBUG] train episode 419: reward = -22.01, steps = 1000\n",
      "16:57:17 [DEBUG] train episode 420: reward = -30.74, steps = 1000\n",
      "16:57:35 [DEBUG] train episode 421: reward = 25.21, steps = 1000\n",
      "16:57:52 [DEBUG] train episode 422: reward = -28.84, steps = 1000\n",
      "16:58:09 [DEBUG] train episode 423: reward = -16.14, steps = 1000\n",
      "16:58:26 [DEBUG] train episode 424: reward = -12.96, steps = 1000\n",
      "16:58:43 [DEBUG] train episode 425: reward = -6.68, steps = 1000\n",
      "16:59:00 [DEBUG] train episode 426: reward = -6.48, steps = 1000\n",
      "16:59:18 [DEBUG] train episode 427: reward = 20.64, steps = 1000\n",
      "16:59:35 [DEBUG] train episode 428: reward = -36.57, steps = 1000\n",
      "16:59:46 [DEBUG] train episode 429: reward = 179.74, steps = 641\n",
      "16:59:59 [DEBUG] train episode 430: reward = -258.31, steps = 750\n",
      "17:00:16 [DEBUG] train episode 431: reward = -37.56, steps = 1000\n",
      "17:00:33 [DEBUG] train episode 432: reward = -9.76, steps = 1000\n",
      "17:00:45 [DEBUG] train episode 433: reward = -380.50, steps = 759\n",
      "17:01:02 [DEBUG] train episode 434: reward = -113.16, steps = 1000\n",
      "17:01:11 [DEBUG] train episode 435: reward = 192.68, steps = 634\n",
      "17:01:28 [DEBUG] train episode 436: reward = -16.30, steps = 1000\n",
      "17:01:37 [DEBUG] train episode 437: reward = 225.65, steps = 604\n",
      "17:01:48 [DEBUG] train episode 438: reward = -92.96, steps = 670\n",
      "17:02:06 [DEBUG] train episode 439: reward = 0.91, steps = 1000\n",
      "17:02:14 [DEBUG] train episode 440: reward = -199.07, steps = 533\n",
      "17:02:27 [DEBUG] train episode 441: reward = 148.87, steps = 734\n",
      "17:02:41 [DEBUG] train episode 442: reward = 123.26, steps = 890\n",
      "17:02:58 [DEBUG] train episode 443: reward = -81.29, steps = 1000\n",
      "17:03:04 [DEBUG] train episode 444: reward = 197.81, steps = 401\n",
      "17:03:21 [DEBUG] train episode 445: reward = 12.10, steps = 1000\n",
      "17:03:32 [DEBUG] train episode 446: reward = 171.38, steps = 632\n",
      "17:03:48 [DEBUG] train episode 447: reward = 0.39, steps = 1000\n",
      "17:03:56 [DEBUG] train episode 448: reward = -50.86, steps = 515\n",
      "17:04:12 [DEBUG] train episode 449: reward = -50.66, steps = 1000\n",
      "17:04:25 [DEBUG] train episode 450: reward = -119.16, steps = 793\n",
      "17:04:36 [DEBUG] train episode 451: reward = -162.38, steps = 723\n",
      "17:04:47 [DEBUG] train episode 452: reward = 106.41, steps = 674\n",
      "17:05:04 [DEBUG] train episode 453: reward = 18.17, steps = 1000\n",
      "17:05:21 [DEBUG] train episode 454: reward = -76.30, steps = 1000\n",
      "17:05:37 [DEBUG] train episode 455: reward = -228.40, steps = 963\n",
      "17:05:54 [DEBUG] train episode 456: reward = -4.35, steps = 1000\n",
      "17:06:11 [DEBUG] train episode 457: reward = 118.99, steps = 997\n",
      "17:06:23 [DEBUG] train episode 458: reward = 171.42, steps = 770\n",
      "17:06:36 [DEBUG] train episode 459: reward = 170.09, steps = 784\n",
      "17:06:54 [DEBUG] train episode 460: reward = -48.00, steps = 1000\n",
      "17:07:01 [DEBUG] train episode 461: reward = 161.24, steps = 483\n",
      "17:07:08 [DEBUG] train episode 462: reward = -1011.54, steps = 434\n",
      "17:07:24 [DEBUG] train episode 463: reward = 27.88, steps = 1000\n",
      "17:07:40 [DEBUG] train episode 464: reward = -55.39, steps = 1000\n",
      "17:07:57 [DEBUG] train episode 465: reward = -11.84, steps = 1000\n",
      "17:08:13 [DEBUG] train episode 466: reward = -16.58, steps = 1000\n",
      "17:08:31 [DEBUG] train episode 467: reward = -48.79, steps = 1000\n",
      "17:08:44 [DEBUG] train episode 468: reward = 75.98, steps = 803\n",
      "17:09:01 [DEBUG] train episode 469: reward = 73.27, steps = 984\n",
      "17:09:18 [DEBUG] train episode 470: reward = 17.57, steps = 1000\n",
      "17:09:35 [DEBUG] train episode 471: reward = -40.56, steps = 1000\n",
      "17:09:46 [DEBUG] train episode 472: reward = 153.79, steps = 692\n",
      "17:10:03 [DEBUG] train episode 473: reward = -31.45, steps = 1000\n",
      "17:10:21 [DEBUG] train episode 474: reward = -11.71, steps = 1000\n",
      "17:10:31 [DEBUG] train episode 475: reward = 173.09, steps = 627\n",
      "17:10:47 [DEBUG] train episode 476: reward = -41.60, steps = 1000\n",
      "17:10:56 [DEBUG] train episode 477: reward = 189.14, steps = 554\n",
      "17:11:12 [DEBUG] train episode 478: reward = -28.12, steps = 1000\n",
      "17:11:29 [DEBUG] train episode 479: reward = 33.71, steps = 1000\n",
      "17:11:37 [DEBUG] train episode 480: reward = 248.13, steps = 464\n",
      "17:11:47 [DEBUG] train episode 481: reward = -178.47, steps = 601\n",
      "17:12:04 [DEBUG] train episode 482: reward = 34.76, steps = 1000\n",
      "17:12:10 [DEBUG] train episode 483: reward = 222.54, steps = 386\n",
      "17:12:27 [DEBUG] train episode 484: reward = -5.07, steps = 1000\n",
      "17:12:34 [DEBUG] train episode 485: reward = 237.54, steps = 418\n",
      "17:12:51 [DEBUG] train episode 486: reward = -422.23, steps = 986\n",
      "17:13:09 [DEBUG] train episode 487: reward = 0.66, steps = 1000\n",
      "17:13:25 [DEBUG] train episode 488: reward = 22.06, steps = 1000\n",
      "17:13:42 [DEBUG] train episode 489: reward = 4.54, steps = 1000\n",
      "17:13:59 [DEBUG] train episode 490: reward = -49.61, steps = 1000\n",
      "17:14:16 [DEBUG] train episode 491: reward = -24.64, steps = 1000\n",
      "17:14:23 [DEBUG] train episode 492: reward = 227.28, steps = 411\n",
      "17:14:40 [DEBUG] train episode 493: reward = -62.41, steps = 1000\n",
      "17:14:53 [DEBUG] train episode 494: reward = 140.25, steps = 790\n",
      "17:15:10 [DEBUG] train episode 495: reward = 7.08, steps = 1000\n",
      "17:15:28 [DEBUG] train episode 496: reward = 37.71, steps = 1000\n",
      "17:15:45 [DEBUG] train episode 497: reward = 2.57, steps = 1000\n",
      "17:15:51 [DEBUG] train episode 498: reward = 238.87, steps = 400\n",
      "17:16:08 [DEBUG] train episode 499: reward = -4.74, steps = 1000\n",
      "17:16:17 [DEBUG] train episode 500: reward = 215.58, steps = 544\n",
      "17:16:34 [DEBUG] train episode 501: reward = 26.46, steps = 1000\n",
      "17:16:51 [DEBUG] train episode 502: reward = 20.71, steps = 1000\n",
      "17:17:09 [DEBUG] train episode 503: reward = 23.39, steps = 1000\n",
      "17:17:26 [DEBUG] train episode 504: reward = 10.38, steps = 1000\n",
      "17:17:35 [DEBUG] train episode 505: reward = 212.92, steps = 620\n",
      "17:17:42 [DEBUG] train episode 506: reward = 214.52, steps = 421\n",
      "17:17:47 [DEBUG] train episode 507: reward = -604.26, steps = 304\n",
      "17:17:51 [DEBUG] train episode 508: reward = 220.46, steps = 304\n",
      "17:18:07 [DEBUG] train episode 509: reward = 19.86, steps = 1000\n",
      "17:18:24 [DEBUG] train episode 510: reward = 69.63, steps = 1000\n",
      "17:18:42 [DEBUG] train episode 511: reward = 39.31, steps = 1000\n",
      "17:18:53 [DEBUG] train episode 512: reward = 177.56, steps = 684\n",
      "17:18:55 [DEBUG] train episode 513: reward = -302.84, steps = 152\n",
      "17:19:12 [DEBUG] train episode 514: reward = 18.61, steps = 1000\n",
      "17:19:14 [DEBUG] train episode 515: reward = -466.55, steps = 125\n",
      "17:19:20 [DEBUG] train episode 516: reward = -415.81, steps = 408\n",
      "17:19:23 [DEBUG] train episode 517: reward = -340.35, steps = 184\n",
      "17:19:38 [DEBUG] train episode 518: reward = -458.65, steps = 926\n",
      "17:19:42 [DEBUG] train episode 519: reward = 281.32, steps = 289\n",
      "17:19:59 [DEBUG] train episode 520: reward = 5.11, steps = 1000\n",
      "17:20:17 [DEBUG] train episode 521: reward = 39.16, steps = 1000\n",
      "17:20:33 [DEBUG] train episode 522: reward = -30.10, steps = 1000\n",
      "17:20:50 [DEBUG] train episode 523: reward = 21.39, steps = 1000\n",
      "17:21:07 [DEBUG] train episode 524: reward = 26.32, steps = 1000\n",
      "17:21:09 [DEBUG] train episode 525: reward = -57.20, steps = 135\n",
      "17:21:16 [DEBUG] train episode 526: reward = -432.65, steps = 437\n",
      "17:21:18 [DEBUG] train episode 527: reward = -295.68, steps = 102\n",
      "17:21:19 [DEBUG] train episode 528: reward = -414.96, steps = 108\n",
      "17:21:24 [DEBUG] train episode 529: reward = 236.73, steps = 304\n",
      "17:21:41 [DEBUG] train episode 530: reward = -12.81, steps = 1000\n",
      "17:21:44 [DEBUG] train episode 531: reward = -136.83, steps = 200\n",
      "17:22:01 [DEBUG] train episode 532: reward = 126.82, steps = 932\n",
      "17:22:19 [DEBUG] train episode 533: reward = -967.64, steps = 999\n",
      "17:22:36 [DEBUG] train episode 534: reward = 64.86, steps = 1000\n",
      "17:22:53 [DEBUG] train episode 535: reward = 85.45, steps = 1000\n",
      "17:23:09 [DEBUG] train episode 536: reward = -13.24, steps = 1000\n",
      "17:23:27 [DEBUG] train episode 537: reward = 41.51, steps = 1000\n",
      "17:23:44 [DEBUG] train episode 538: reward = 35.68, steps = 1000\n",
      "17:24:01 [DEBUG] train episode 539: reward = -11.86, steps = 1000\n",
      "17:24:09 [DEBUG] train episode 540: reward = -549.14, steps = 509\n",
      "17:24:10 [DEBUG] train episode 541: reward = -53.61, steps = 74\n",
      "17:24:28 [DEBUG] train episode 542: reward = -25.94, steps = 1000\n",
      "17:24:45 [DEBUG] train episode 543: reward = -13.76, steps = 1000\n",
      "17:24:52 [DEBUG] train episode 544: reward = -610.65, steps = 391\n",
      "17:25:07 [DEBUG] train episode 545: reward = -133.54, steps = 905\n",
      "17:25:25 [DEBUG] train episode 546: reward = -2.68, steps = 1000\n",
      "17:25:42 [DEBUG] train episode 547: reward = 32.10, steps = 1000\n",
      "17:25:43 [DEBUG] train episode 548: reward = 22.49, steps = 96\n",
      "17:25:45 [DEBUG] train episode 549: reward = 33.30, steps = 102\n",
      "17:26:02 [DEBUG] train episode 550: reward = -39.32, steps = 1000\n",
      "17:26:19 [DEBUG] train episode 551: reward = -37.75, steps = 1000\n",
      "17:26:38 [DEBUG] train episode 552: reward = 30.53, steps = 1000\n",
      "17:26:55 [DEBUG] train episode 553: reward = -15.91, steps = 1000\n",
      "17:27:12 [DEBUG] train episode 554: reward = -4.25, steps = 1000\n",
      "17:27:29 [DEBUG] train episode 555: reward = -6.59, steps = 1000\n",
      "17:27:46 [DEBUG] train episode 556: reward = 28.12, steps = 1000\n",
      "17:28:03 [DEBUG] train episode 557: reward = 41.68, steps = 1000\n",
      "17:28:20 [DEBUG] train episode 558: reward = 71.26, steps = 1000\n",
      "17:28:37 [DEBUG] train episode 559: reward = 22.88, steps = 1000\n",
      "17:28:52 [DEBUG] train episode 560: reward = 195.52, steps = 892\n",
      "17:29:09 [DEBUG] train episode 561: reward = 24.32, steps = 1000\n",
      "17:29:26 [DEBUG] train episode 562: reward = -19.14, steps = 1000\n",
      "17:29:44 [DEBUG] train episode 563: reward = 56.24, steps = 1000\n",
      "17:30:01 [DEBUG] train episode 564: reward = 85.08, steps = 1000\n",
      "17:30:15 [DEBUG] train episode 565: reward = 174.13, steps = 824\n",
      "17:30:17 [DEBUG] train episode 566: reward = -178.52, steps = 168\n",
      "17:30:35 [DEBUG] train episode 567: reward = -2.73, steps = 1000\n",
      "17:30:52 [DEBUG] train episode 568: reward = 37.31, steps = 1000\n",
      "17:30:59 [DEBUG] train episode 569: reward = 215.90, steps = 448\n",
      "17:31:16 [DEBUG] train episode 570: reward = -5.80, steps = 1000\n",
      "17:31:17 [DEBUG] train episode 571: reward = -51.08, steps = 84\n",
      "17:31:34 [DEBUG] train episode 572: reward = 37.96, steps = 1000\n",
      "17:31:52 [DEBUG] train episode 573: reward = 38.76, steps = 1000\n",
      "17:32:08 [DEBUG] train episode 574: reward = 12.21, steps = 1000\n",
      "17:32:10 [DEBUG] train episode 575: reward = -82.36, steps = 96\n",
      "17:32:16 [DEBUG] train episode 576: reward = 217.38, steps = 405\n",
      "17:32:25 [DEBUG] train episode 577: reward = 203.25, steps = 565\n",
      "17:32:36 [DEBUG] train episode 578: reward = 221.50, steps = 640\n",
      "17:32:38 [DEBUG] train episode 579: reward = 76.11, steps = 159\n",
      "17:32:55 [DEBUG] train episode 580: reward = 132.48, steps = 977\n",
      "17:33:05 [DEBUG] train episode 581: reward = 219.43, steps = 614\n",
      "17:33:21 [DEBUG] train episode 582: reward = -9.71, steps = 1000\n",
      "17:33:27 [DEBUG] train episode 583: reward = 240.53, steps = 365\n",
      "17:33:44 [DEBUG] train episode 584: reward = 2.31, steps = 1000\n",
      "17:33:45 [DEBUG] train episode 585: reward = 32.22, steps = 89\n",
      "17:33:47 [DEBUG] train episode 586: reward = -27.33, steps = 86\n",
      "17:33:51 [DEBUG] train episode 587: reward = 258.94, steps = 308\n",
      "17:34:00 [DEBUG] train episode 588: reward = 241.40, steps = 566\n",
      "17:34:18 [DEBUG] train episode 589: reward = 59.84, steps = 1000\n",
      "17:34:20 [DEBUG] train episode 590: reward = -59.60, steps = 108\n",
      "17:34:21 [DEBUG] train episode 591: reward = -34.47, steps = 57\n",
      "17:34:37 [DEBUG] train episode 592: reward = 31.24, steps = 1000\n",
      "17:34:51 [DEBUG] train episode 593: reward = 187.36, steps = 800\n",
      "17:35:03 [DEBUG] train episode 594: reward = 197.85, steps = 713\n",
      "17:35:21 [DEBUG] train episode 595: reward = 10.25, steps = 1000\n",
      "17:35:38 [DEBUG] train episode 596: reward = 17.86, steps = 1000\n",
      "17:35:55 [DEBUG] train episode 597: reward = 41.72, steps = 1000\n",
      "17:35:57 [DEBUG] train episode 598: reward = 2.79, steps = 121\n",
      "17:36:14 [DEBUG] train episode 599: reward = 53.77, steps = 1000\n",
      "17:36:31 [DEBUG] train episode 600: reward = -1.95, steps = 1000\n",
      "17:36:41 [DEBUG] train episode 601: reward = 207.04, steps = 608\n",
      "17:36:58 [DEBUG] train episode 602: reward = 42.99, steps = 1000\n",
      "17:37:16 [DEBUG] train episode 603: reward = 25.57, steps = 1000\n",
      "17:37:24 [DEBUG] train episode 604: reward = 202.82, steps = 509\n",
      "17:37:40 [DEBUG] train episode 605: reward = 45.00, steps = 1000\n",
      "17:37:58 [DEBUG] train episode 606: reward = 38.45, steps = 1000\n",
      "17:38:15 [DEBUG] train episode 607: reward = 41.12, steps = 1000\n",
      "17:38:16 [DEBUG] train episode 608: reward = -14.02, steps = 106\n",
      "17:38:33 [DEBUG] train episode 609: reward = 20.66, steps = 1000\n",
      "17:38:51 [DEBUG] train episode 610: reward = 21.48, steps = 1000\n",
      "17:39:04 [DEBUG] train episode 611: reward = 175.96, steps = 788\n",
      "17:39:21 [DEBUG] train episode 612: reward = 51.15, steps = 1000\n",
      "17:39:38 [DEBUG] train episode 613: reward = 1.04, steps = 1000\n",
      "17:39:55 [DEBUG] train episode 614: reward = 114.43, steps = 1000\n",
      "17:40:00 [DEBUG] train episode 615: reward = 238.75, steps = 321\n",
      "17:40:17 [DEBUG] train episode 616: reward = 105.67, steps = 1000\n",
      "17:40:33 [DEBUG] train episode 617: reward = 109.79, steps = 1000\n",
      "17:40:49 [DEBUG] train episode 618: reward = 176.95, steps = 990\n",
      "17:41:06 [DEBUG] train episode 619: reward = 54.18, steps = 1000\n",
      "17:41:17 [DEBUG] train episode 620: reward = 214.27, steps = 640\n",
      "17:41:23 [DEBUG] train episode 621: reward = 204.66, steps = 389\n",
      "17:41:25 [DEBUG] train episode 622: reward = 7.88, steps = 119\n",
      "17:41:41 [DEBUG] train episode 623: reward = 165.24, steps = 1000\n",
      "17:41:57 [DEBUG] train episode 624: reward = 167.14, steps = 1000\n",
      "17:41:58 [DEBUG] train episode 625: reward = 7.88, steps = 72\n",
      "17:42:00 [DEBUG] train episode 626: reward = -128.61, steps = 91\n",
      "17:42:01 [DEBUG] train episode 627: reward = -16.84, steps = 74\n",
      "17:42:17 [DEBUG] train episode 628: reward = 155.34, steps = 1000\n",
      "17:42:33 [DEBUG] train episode 629: reward = 100.81, steps = 1000\n",
      "17:42:35 [DEBUG] train episode 630: reward = -115.57, steps = 117\n",
      "17:42:51 [DEBUG] train episode 631: reward = 138.67, steps = 1000\n",
      "17:43:09 [DEBUG] train episode 632: reward = 14.39, steps = 1000\n",
      "17:43:26 [DEBUG] train episode 633: reward = 102.26, steps = 1000\n",
      "17:43:40 [DEBUG] train episode 634: reward = 181.22, steps = 861\n",
      "17:43:56 [DEBUG] train episode 635: reward = 164.01, steps = 1000\n",
      "17:44:01 [DEBUG] train episode 636: reward = 235.82, steps = 343\n",
      "17:44:17 [DEBUG] train episode 637: reward = 162.08, steps = 1000\n",
      "17:44:19 [DEBUG] train episode 638: reward = 10.27, steps = 128\n",
      "17:44:36 [DEBUG] train episode 639: reward = 117.35, steps = 1000\n",
      "17:44:38 [DEBUG] train episode 640: reward = 53.56, steps = 111\n",
      "17:44:47 [DEBUG] train episode 641: reward = 256.07, steps = 595\n",
      "17:45:06 [DEBUG] train episode 642: reward = 101.01, steps = 1000\n",
      "17:45:23 [DEBUG] train episode 643: reward = 122.99, steps = 1000\n",
      "17:45:25 [DEBUG] train episode 644: reward = 73.47, steps = 120\n",
      "17:45:42 [DEBUG] train episode 645: reward = 142.42, steps = 1000\n",
      "17:45:44 [DEBUG] train episode 646: reward = 66.60, steps = 104\n",
      "17:45:53 [DEBUG] train episode 647: reward = 220.20, steps = 564\n",
      "17:46:10 [DEBUG] train episode 648: reward = 127.84, steps = 1000\n",
      "17:46:26 [DEBUG] train episode 649: reward = 120.23, steps = 1000\n",
      "17:46:33 [DEBUG] train episode 650: reward = 223.96, steps = 461\n",
      "17:46:49 [DEBUG] train episode 651: reward = 154.65, steps = 1000\n",
      "17:47:06 [DEBUG] train episode 652: reward = 143.88, steps = 1000\n",
      "17:47:22 [DEBUG] train episode 653: reward = 150.57, steps = 1000\n",
      "17:47:38 [DEBUG] train episode 654: reward = 27.69, steps = 1000\n",
      "17:47:42 [DEBUG] train episode 655: reward = -43.86, steps = 229\n",
      "17:47:57 [DEBUG] train episode 656: reward = 89.57, steps = 1000\n",
      "17:48:14 [DEBUG] train episode 657: reward = 168.64, steps = 1000\n",
      "17:48:24 [DEBUG] train episode 658: reward = 207.00, steps = 642\n",
      "17:48:40 [DEBUG] train episode 659: reward = 182.23, steps = 1000\n",
      "17:48:57 [DEBUG] train episode 660: reward = 115.71, steps = 1000\n",
      "17:49:01 [DEBUG] train episode 661: reward = -160.52, steps = 281\n",
      "17:49:17 [DEBUG] train episode 662: reward = 157.85, steps = 1000\n",
      "17:49:33 [DEBUG] train episode 663: reward = 130.31, steps = 1000\n",
      "17:49:49 [DEBUG] train episode 664: reward = 130.32, steps = 1000\n",
      "17:49:56 [DEBUG] train episode 665: reward = 277.95, steps = 410\n",
      "17:50:12 [DEBUG] train episode 666: reward = 154.16, steps = 1000\n",
      "17:50:18 [DEBUG] train episode 667: reward = 196.92, steps = 423\n",
      "17:50:34 [DEBUG] train episode 668: reward = 135.52, steps = 1000\n",
      "17:50:50 [DEBUG] train episode 669: reward = 106.39, steps = 1000\n",
      "17:50:52 [DEBUG] train episode 670: reward = -24.90, steps = 104\n",
      "17:51:08 [DEBUG] train episode 671: reward = 154.71, steps = 1000\n",
      "17:51:15 [DEBUG] train episode 672: reward = 226.01, steps = 468\n",
      "17:51:31 [DEBUG] train episode 673: reward = 132.32, steps = 1000\n",
      "17:51:47 [DEBUG] train episode 674: reward = 138.69, steps = 1000\n",
      "17:52:00 [DEBUG] train episode 675: reward = 232.37, steps = 797\n",
      "17:52:13 [DEBUG] train episode 676: reward = 249.46, steps = 817\n",
      "17:52:29 [DEBUG] train episode 677: reward = 112.30, steps = 1000\n",
      "17:52:45 [DEBUG] train episode 678: reward = 104.20, steps = 1000\n",
      "17:52:51 [DEBUG] train episode 679: reward = -187.01, steps = 379\n",
      "17:53:08 [DEBUG] train episode 680: reward = 136.29, steps = 1000\n",
      "17:53:13 [DEBUG] train episode 681: reward = -486.16, steps = 360\n",
      "17:53:20 [DEBUG] train episode 682: reward = -749.46, steps = 446\n",
      "17:53:36 [DEBUG] train episode 683: reward = 109.68, steps = 1000\n",
      "17:53:52 [DEBUG] train episode 684: reward = 154.85, steps = 1000\n",
      "17:54:09 [DEBUG] train episode 685: reward = 156.93, steps = 1000\n",
      "17:54:12 [DEBUG] train episode 686: reward = -406.34, steps = 210\n",
      "17:54:17 [DEBUG] train episode 687: reward = -401.19, steps = 300\n",
      "17:54:21 [DEBUG] train episode 688: reward = -82.41, steps = 291\n",
      "17:54:37 [DEBUG] train episode 689: reward = 115.55, steps = 1000\n",
      "17:54:54 [DEBUG] train episode 690: reward = 121.94, steps = 1000\n",
      "17:55:02 [DEBUG] train episode 691: reward = 183.99, steps = 519\n",
      "17:55:19 [DEBUG] train episode 692: reward = 76.99, steps = 1000\n",
      "17:55:37 [DEBUG] train episode 693: reward = 57.91, steps = 1000\n",
      "17:55:53 [DEBUG] train episode 694: reward = 123.04, steps = 1000\n",
      "17:56:10 [DEBUG] train episode 695: reward = 38.86, steps = 1000\n",
      "17:56:25 [DEBUG] train episode 696: reward = 134.54, steps = 1000\n",
      "17:56:32 [DEBUG] train episode 697: reward = -130.50, steps = 430\n",
      "17:56:49 [DEBUG] train episode 698: reward = 124.60, steps = 1000\n",
      "17:56:53 [DEBUG] train episode 699: reward = 267.28, steps = 276\n",
      "17:56:59 [DEBUG] train episode 700: reward = 209.36, steps = 389\n",
      "17:57:15 [DEBUG] train episode 701: reward = 159.69, steps = 1000\n",
      "17:57:18 [DEBUG] train episode 702: reward = -7.29, steps = 189\n",
      "17:57:25 [DEBUG] train episode 703: reward = 222.08, steps = 413\n",
      "17:57:36 [DEBUG] train episode 704: reward = 232.52, steps = 686\n",
      "17:57:40 [DEBUG] train episode 705: reward = 10.55, steps = 215\n",
      "17:57:57 [DEBUG] train episode 706: reward = 142.65, steps = 1000\n",
      "17:58:14 [DEBUG] train episode 707: reward = 171.00, steps = 1000\n",
      "17:58:32 [DEBUG] train episode 708: reward = 128.44, steps = 1000\n",
      "17:58:36 [DEBUG] train episode 709: reward = 4.20, steps = 241\n",
      "17:58:56 [DEBUG] train episode 710: reward = -51.23, steps = 1000\n",
      "17:59:02 [DEBUG] train episode 711: reward = 244.43, steps = 382\n",
      "17:59:09 [DEBUG] train episode 712: reward = -198.80, steps = 386\n",
      "17:59:26 [DEBUG] train episode 713: reward = 120.89, steps = 1000\n",
      "17:59:43 [DEBUG] train episode 714: reward = 89.99, steps = 1000\n",
      "17:59:47 [DEBUG] train episode 715: reward = 217.96, steps = 308\n",
      "18:00:00 [DEBUG] train episode 716: reward = 127.40, steps = 741\n",
      "18:00:08 [DEBUG] train episode 717: reward = 256.36, steps = 516\n",
      "18:00:22 [DEBUG] train episode 718: reward = 177.99, steps = 759\n",
      "18:00:33 [DEBUG] train episode 719: reward = 247.88, steps = 732\n",
      "18:00:51 [DEBUG] train episode 720: reward = 130.01, steps = 1000\n",
      "18:00:54 [DEBUG] train episode 721: reward = -313.62, steps = 177\n",
      "18:01:04 [DEBUG] train episode 722: reward = 250.17, steps = 519\n",
      "18:01:24 [DEBUG] train episode 723: reward = 80.82, steps = 1000\n",
      "18:01:28 [DEBUG] train episode 724: reward = 40.78, steps = 188\n",
      "18:01:48 [DEBUG] train episode 725: reward = 99.39, steps = 1000\n",
      "18:02:06 [DEBUG] train episode 726: reward = 143.09, steps = 1000\n",
      "18:02:25 [DEBUG] train episode 727: reward = 153.73, steps = 1000\n",
      "18:02:42 [DEBUG] train episode 728: reward = 147.26, steps = 1000\n",
      "18:02:52 [DEBUG] train episode 729: reward = 238.17, steps = 618\n",
      "18:03:09 [DEBUG] train episode 730: reward = 79.20, steps = 1000\n",
      "18:03:27 [DEBUG] train episode 731: reward = 24.03, steps = 1000\n",
      "18:03:31 [DEBUG] train episode 732: reward = 242.16, steps = 235\n",
      "18:03:47 [DEBUG] train episode 733: reward = 78.30, steps = 1000\n",
      "18:04:03 [DEBUG] train episode 734: reward = 97.28, steps = 1000\n",
      "18:04:20 [DEBUG] train episode 735: reward = 90.98, steps = 1000\n",
      "18:04:38 [DEBUG] train episode 736: reward = 79.91, steps = 1000\n",
      "18:04:50 [DEBUG] train episode 737: reward = -159.03, steps = 692\n",
      "18:05:08 [DEBUG] train episode 738: reward = 85.60, steps = 1000\n",
      "18:05:10 [DEBUG] train episode 739: reward = 43.50, steps = 171\n",
      "18:05:13 [DEBUG] train episode 740: reward = 30.27, steps = 171\n",
      "18:05:16 [DEBUG] train episode 741: reward = -454.75, steps = 188\n",
      "18:05:17 [DEBUG] train episode 742: reward = -212.17, steps = 70\n",
      "18:05:33 [DEBUG] train episode 743: reward = 219.78, steps = 982\n",
      "18:05:50 [DEBUG] train episode 744: reward = 149.90, steps = 1000\n",
      "18:06:07 [DEBUG] train episode 745: reward = 128.33, steps = 1000\n",
      "18:06:15 [DEBUG] train episode 746: reward = 208.50, steps = 468\n",
      "18:06:20 [DEBUG] train episode 747: reward = -88.29, steps = 306\n",
      "18:06:23 [DEBUG] train episode 748: reward = 255.11, steps = 195\n",
      "18:06:36 [DEBUG] train episode 749: reward = 231.50, steps = 765\n",
      "18:06:38 [DEBUG] train episode 750: reward = 33.01, steps = 145\n",
      "18:06:56 [DEBUG] train episode 751: reward = -55.89, steps = 1000\n",
      "18:07:01 [DEBUG] train episode 752: reward = -7.38, steps = 328\n",
      "18:07:06 [DEBUG] train episode 753: reward = -133.72, steps = 330\n",
      "18:07:22 [DEBUG] train episode 754: reward = 251.94, steps = 940\n",
      "18:07:38 [DEBUG] train episode 755: reward = 138.53, steps = 1000\n",
      "18:07:40 [DEBUG] train episode 756: reward = 11.25, steps = 119\n",
      "18:07:56 [DEBUG] train episode 757: reward = 151.37, steps = 1000\n",
      "18:08:04 [DEBUG] train episode 758: reward = 241.25, steps = 475\n",
      "18:08:21 [DEBUG] train episode 759: reward = -39.54, steps = 1000\n",
      "18:08:37 [DEBUG] train episode 760: reward = 144.65, steps = 1000\n",
      "18:08:49 [DEBUG] train episode 761: reward = 211.23, steps = 696\n",
      "18:09:06 [DEBUG] train episode 762: reward = 108.09, steps = 1000\n",
      "18:09:23 [DEBUG] train episode 763: reward = -86.33, steps = 1000\n",
      "18:09:34 [DEBUG] train episode 764: reward = -99.00, steps = 704\n",
      "18:09:45 [DEBUG] train episode 765: reward = 184.86, steps = 652\n",
      "18:09:53 [DEBUG] train episode 766: reward = 256.11, steps = 444\n",
      "18:09:56 [DEBUG] train episode 767: reward = 263.82, steps = 220\n",
      "18:10:13 [DEBUG] train episode 768: reward = 158.73, steps = 1000\n",
      "18:10:30 [DEBUG] train episode 769: reward = 79.91, steps = 1000\n",
      "18:10:48 [DEBUG] train episode 770: reward = 222.30, steps = 982\n",
      "18:10:52 [DEBUG] train episode 771: reward = 294.06, steps = 263\n",
      "18:11:11 [DEBUG] train episode 772: reward = 130.29, steps = 1000\n",
      "18:11:14 [DEBUG] train episode 773: reward = -66.40, steps = 179\n",
      "18:11:30 [DEBUG] train episode 774: reward = 102.93, steps = 1000\n",
      "18:11:48 [DEBUG] train episode 775: reward = 115.97, steps = 1000\n",
      "18:12:05 [DEBUG] train episode 776: reward = 107.86, steps = 1000\n",
      "18:12:19 [DEBUG] train episode 777: reward = 254.59, steps = 793\n",
      "18:12:35 [DEBUG] train episode 778: reward = 104.27, steps = 1000\n",
      "18:12:52 [DEBUG] train episode 779: reward = 158.35, steps = 1000\n",
      "18:12:59 [DEBUG] train episode 780: reward = 227.04, steps = 399\n",
      "18:13:16 [DEBUG] train episode 781: reward = 171.72, steps = 1000\n",
      "18:13:21 [DEBUG] train episode 782: reward = 252.99, steps = 329\n",
      "18:13:36 [DEBUG] train episode 783: reward = 261.01, steps = 894\n",
      "18:13:52 [DEBUG] train episode 784: reward = 255.66, steps = 925\n",
      "18:14:09 [DEBUG] train episode 785: reward = 133.93, steps = 1000\n",
      "18:14:26 [DEBUG] train episode 786: reward = 155.62, steps = 1000\n",
      "18:14:43 [DEBUG] train episode 787: reward = 141.15, steps = 1000\n",
      "18:14:49 [DEBUG] train episode 788: reward = 234.30, steps = 377\n",
      "18:14:54 [DEBUG] train episode 789: reward = 253.35, steps = 258\n",
      "18:15:12 [DEBUG] train episode 790: reward = 84.78, steps = 1000\n",
      "18:15:30 [DEBUG] train episode 791: reward = 117.51, steps = 1000\n",
      "18:15:47 [DEBUG] train episode 792: reward = 93.79, steps = 1000\n",
      "18:15:49 [DEBUG] train episode 793: reward = -185.16, steps = 129\n",
      "18:16:05 [DEBUG] train episode 794: reward = 162.98, steps = 1000\n",
      "18:16:09 [DEBUG] train episode 795: reward = 256.94, steps = 258\n",
      "18:16:25 [DEBUG] train episode 796: reward = 134.97, steps = 1000\n",
      "18:16:26 [DEBUG] train episode 797: reward = -231.05, steps = 79\n",
      "18:16:43 [DEBUG] train episode 798: reward = 151.98, steps = 1000\n",
      "18:16:59 [DEBUG] train episode 799: reward = 130.34, steps = 1000\n",
      "18:17:15 [DEBUG] train episode 800: reward = 154.68, steps = 1000\n",
      "18:17:19 [DEBUG] train episode 801: reward = -170.08, steps = 214\n",
      "18:17:35 [DEBUG] train episode 802: reward = 81.76, steps = 1000\n",
      "18:17:51 [DEBUG] train episode 803: reward = 140.37, steps = 1000\n",
      "18:17:58 [DEBUG] train episode 804: reward = 215.17, steps = 409\n",
      "18:18:01 [DEBUG] train episode 805: reward = 278.40, steps = 211\n",
      "18:18:18 [DEBUG] train episode 806: reward = 93.31, steps = 1000\n",
      "18:18:34 [DEBUG] train episode 807: reward = 166.68, steps = 1000\n",
      "18:18:50 [DEBUG] train episode 808: reward = 132.64, steps = 1000\n",
      "18:18:54 [DEBUG] train episode 809: reward = 240.51, steps = 272\n",
      "18:18:58 [DEBUG] train episode 810: reward = 251.70, steps = 217\n",
      "18:19:14 [DEBUG] train episode 811: reward = 127.53, steps = 1000\n",
      "18:19:31 [DEBUG] train episode 812: reward = 95.43, steps = 1000\n",
      "18:19:48 [DEBUG] train episode 813: reward = 135.73, steps = 1000\n",
      "18:19:58 [DEBUG] train episode 814: reward = 268.12, steps = 661\n",
      "18:20:15 [DEBUG] train episode 815: reward = 147.78, steps = 1000\n",
      "18:20:31 [DEBUG] train episode 816: reward = 170.47, steps = 1000\n",
      "18:20:48 [DEBUG] train episode 817: reward = 130.99, steps = 1000\n",
      "18:20:52 [DEBUG] train episode 818: reward = 262.83, steps = 274\n",
      "18:21:09 [DEBUG] train episode 819: reward = 110.39, steps = 1000\n",
      "18:21:15 [DEBUG] train episode 820: reward = 239.54, steps = 378\n",
      "18:21:31 [DEBUG] train episode 821: reward = 133.82, steps = 1000\n",
      "18:21:48 [DEBUG] train episode 822: reward = 137.69, steps = 1000\n",
      "18:22:05 [DEBUG] train episode 823: reward = 149.98, steps = 1000\n",
      "18:22:21 [DEBUG] train episode 824: reward = 174.66, steps = 1000\n",
      "18:22:23 [DEBUG] train episode 825: reward = 23.42, steps = 111\n",
      "18:22:29 [DEBUG] train episode 826: reward = 256.38, steps = 376\n",
      "18:22:43 [DEBUG] train episode 827: reward = 170.66, steps = 1000\n",
      "18:22:58 [DEBUG] train episode 828: reward = 144.07, steps = 1000\n",
      "18:23:12 [DEBUG] train episode 829: reward = 141.22, steps = 1000\n",
      "18:23:15 [DEBUG] train episode 830: reward = 268.09, steps = 214\n",
      "18:23:21 [DEBUG] train episode 831: reward = 197.00, steps = 489\n",
      "18:23:28 [DEBUG] train episode 832: reward = 248.00, steps = 492\n",
      "18:23:42 [DEBUG] train episode 833: reward = 125.99, steps = 1000\n",
      "18:23:56 [DEBUG] train episode 834: reward = 136.15, steps = 1000\n",
      "18:23:59 [DEBUG] train episode 835: reward = 269.43, steps = 251\n",
      "18:24:14 [DEBUG] train episode 836: reward = 125.02, steps = 1000\n",
      "18:24:28 [DEBUG] train episode 837: reward = 110.55, steps = 1000\n",
      "18:24:41 [DEBUG] train episode 838: reward = 129.61, steps = 1000\n",
      "18:24:44 [DEBUG] train episode 839: reward = 271.42, steps = 227\n",
      "18:24:46 [DEBUG] train episode 840: reward = 19.81, steps = 139\n",
      "18:25:00 [DEBUG] train episode 841: reward = 131.76, steps = 1000\n",
      "18:25:06 [DEBUG] train episode 842: reward = 294.54, steps = 406\n",
      "18:25:20 [DEBUG] train episode 843: reward = 104.78, steps = 1000\n",
      "18:25:34 [DEBUG] train episode 844: reward = 99.10, steps = 1000\n",
      "18:25:48 [DEBUG] train episode 845: reward = 115.60, steps = 1000\n",
      "18:26:02 [DEBUG] train episode 846: reward = 140.14, steps = 1000\n",
      "18:26:17 [DEBUG] train episode 847: reward = 145.72, steps = 1000\n",
      "18:26:31 [DEBUG] train episode 848: reward = 165.32, steps = 1000\n",
      "18:26:46 [DEBUG] train episode 849: reward = 132.30, steps = 1000\n",
      "18:27:01 [DEBUG] train episode 850: reward = 139.42, steps = 1000\n",
      "18:27:05 [DEBUG] train episode 851: reward = -866.85, steps = 260\n",
      "18:27:11 [DEBUG] train episode 852: reward = -1546.72, steps = 403\n",
      "18:27:16 [DEBUG] train episode 853: reward = -161.36, steps = 358\n",
      "18:27:21 [DEBUG] train episode 854: reward = 212.41, steps = 378\n",
      "18:27:22 [DEBUG] train episode 855: reward = -604.72, steps = 92\n",
      "18:27:30 [DEBUG] train episode 856: reward = 255.88, steps = 556\n",
      "18:27:32 [DEBUG] train episode 857: reward = 4.19, steps = 188\n",
      "18:27:34 [DEBUG] train episode 858: reward = 261.39, steps = 156\n",
      "18:27:36 [DEBUG] train episode 859: reward = -7.95, steps = 86\n",
      "18:27:44 [DEBUG] train episode 860: reward = 218.55, steps = 613\n",
      "18:27:47 [DEBUG] train episode 861: reward = 289.51, steps = 241\n",
      "18:27:50 [DEBUG] train episode 862: reward = 246.36, steps = 255\n",
      "18:27:51 [DEBUG] train episode 863: reward = 26.10, steps = 75\n",
      "18:28:05 [DEBUG] train episode 864: reward = 157.62, steps = 1000\n",
      "18:28:19 [DEBUG] train episode 865: reward = 115.23, steps = 1000\n",
      "18:28:22 [DEBUG] train episode 866: reward = -328.90, steps = 274\n",
      "18:28:26 [DEBUG] train episode 867: reward = 230.80, steps = 280\n",
      "18:28:32 [DEBUG] train episode 868: reward = 259.59, steps = 456\n",
      "18:28:36 [DEBUG] train episode 869: reward = -585.37, steps = 340\n",
      "18:28:38 [DEBUG] train episode 870: reward = 28.53, steps = 149\n",
      "18:28:41 [DEBUG] train episode 871: reward = 284.25, steps = 244\n",
      "18:28:55 [DEBUG] train episode 872: reward = 137.13, steps = 1000\n",
      "18:28:56 [DEBUG] train episode 873: reward = -52.76, steps = 75\n",
      "18:29:00 [DEBUG] train episode 874: reward = -44.45, steps = 263\n",
      "18:29:02 [DEBUG] train episode 875: reward = 53.58, steps = 148\n",
      "18:29:16 [DEBUG] train episode 876: reward = 113.73, steps = 1000\n",
      "18:29:18 [DEBUG] train episode 877: reward = -65.56, steps = 218\n",
      "18:29:19 [DEBUG] train episode 878: reward = -87.05, steps = 68\n",
      "18:29:24 [DEBUG] train episode 879: reward = 253.74, steps = 325\n",
      "18:29:30 [DEBUG] train episode 880: reward = -179.31, steps = 483\n",
      "18:29:33 [DEBUG] train episode 881: reward = 249.55, steps = 205\n",
      "18:29:35 [DEBUG] train episode 882: reward = -5.75, steps = 202\n",
      "18:29:40 [DEBUG] train episode 883: reward = 238.89, steps = 340\n",
      "18:29:46 [DEBUG] train episode 884: reward = 228.81, steps = 456\n",
      "18:29:47 [DEBUG] train episode 885: reward = 13.06, steps = 115\n",
      "18:29:56 [DEBUG] train episode 886: reward = 243.02, steps = 598\n",
      "18:30:01 [DEBUG] train episode 887: reward = 209.74, steps = 400\n",
      "18:30:04 [DEBUG] train episode 888: reward = 279.38, steps = 234\n",
      "18:30:18 [DEBUG] train episode 889: reward = 128.43, steps = 1000\n",
      "18:30:33 [DEBUG] train episode 890: reward = 85.24, steps = 1000\n",
      "18:30:48 [DEBUG] train episode 891: reward = 111.26, steps = 1000\n",
      "18:31:03 [DEBUG] train episode 892: reward = 157.65, steps = 1000\n",
      "18:31:11 [DEBUG] train episode 893: reward = -24.41, steps = 577\n",
      "18:31:19 [DEBUG] train episode 894: reward = 218.77, steps = 587\n",
      "18:31:23 [DEBUG] train episode 895: reward = 218.10, steps = 283\n",
      "18:31:37 [DEBUG] train episode 896: reward = 108.42, steps = 1000\n",
      "18:31:46 [DEBUG] train episode 897: reward = -63.21, steps = 600\n",
      "18:31:51 [DEBUG] train episode 898: reward = 192.27, steps = 346\n",
      "18:31:57 [DEBUG] train episode 899: reward = 248.43, steps = 459\n",
      "18:32:01 [DEBUG] train episode 900: reward = 269.86, steps = 267\n",
      "18:32:03 [DEBUG] train episode 901: reward = 63.01, steps = 141\n",
      "18:32:09 [DEBUG] train episode 902: reward = 250.85, steps = 484\n",
      "18:32:12 [DEBUG] train episode 903: reward = -231.04, steps = 230\n",
      "18:32:18 [DEBUG] train episode 904: reward = 231.72, steps = 440\n",
      "18:32:29 [DEBUG] train episode 905: reward = 225.02, steps = 811\n",
      "18:32:35 [DEBUG] train episode 906: reward = 253.08, steps = 421\n",
      "18:32:49 [DEBUG] train episode 907: reward = 130.34, steps = 1000\n",
      "18:32:59 [DEBUG] train episode 908: reward = 213.74, steps = 704\n",
      "18:33:14 [DEBUG] train episode 909: reward = 91.86, steps = 1000\n",
      "18:33:18 [DEBUG] train episode 910: reward = 295.56, steps = 299\n",
      "18:33:23 [DEBUG] train episode 911: reward = 196.36, steps = 395\n",
      "18:33:38 [DEBUG] train episode 912: reward = 164.99, steps = 1000\n",
      "18:33:52 [DEBUG] train episode 913: reward = 131.58, steps = 1000\n",
      "18:34:06 [DEBUG] train episode 914: reward = 135.81, steps = 1000\n",
      "18:34:20 [DEBUG] train episode 915: reward = 152.15, steps = 1000\n",
      "18:34:34 [DEBUG] train episode 916: reward = 183.03, steps = 1000\n",
      "18:34:48 [DEBUG] train episode 917: reward = 133.81, steps = 1000\n",
      "18:35:02 [DEBUG] train episode 918: reward = 130.18, steps = 1000\n",
      "18:35:09 [DEBUG] train episode 919: reward = 254.73, steps = 509\n",
      "18:35:23 [DEBUG] train episode 920: reward = 109.03, steps = 1000\n",
      "18:35:38 [DEBUG] train episode 921: reward = 166.15, steps = 1000\n",
      "18:35:52 [DEBUG] train episode 922: reward = 142.53, steps = 1000\n",
      "18:36:07 [DEBUG] train episode 923: reward = 142.92, steps = 1000\n",
      "18:36:22 [DEBUG] train episode 924: reward = 167.44, steps = 1000\n",
      "18:36:30 [DEBUG] train episode 925: reward = 188.15, steps = 505\n",
      "18:36:43 [DEBUG] train episode 926: reward = 215.21, steps = 874\n",
      "18:36:58 [DEBUG] train episode 927: reward = 143.08, steps = 1000\n",
      "18:37:13 [DEBUG] train episode 928: reward = 112.91, steps = 1000\n",
      "18:37:28 [DEBUG] train episode 929: reward = 81.78, steps = 1000\n",
      "18:37:42 [DEBUG] train episode 930: reward = 140.03, steps = 1000\n",
      "18:37:55 [DEBUG] train episode 931: reward = 163.42, steps = 1000\n",
      "18:38:09 [DEBUG] train episode 932: reward = 98.82, steps = 1000\n",
      "18:38:23 [DEBUG] train episode 933: reward = 120.41, steps = 1000\n",
      "18:38:37 [DEBUG] train episode 934: reward = 183.47, steps = 1000\n",
      "18:38:52 [DEBUG] train episode 935: reward = 137.67, steps = 1000\n",
      "18:38:59 [DEBUG] train episode 936: reward = 266.40, steps = 543\n",
      "18:39:03 [DEBUG] train episode 937: reward = 297.45, steps = 288\n",
      "18:39:07 [DEBUG] train episode 938: reward = 260.39, steps = 280\n",
      "18:39:15 [DEBUG] train episode 939: reward = 207.47, steps = 613\n",
      "18:39:17 [DEBUG] train episode 940: reward = -28.38, steps = 104\n",
      "18:39:22 [DEBUG] train episode 941: reward = 257.70, steps = 377\n",
      "18:39:28 [DEBUG] train episode 942: reward = 254.55, steps = 472\n",
      "18:39:42 [DEBUG] train episode 943: reward = 149.70, steps = 1000\n",
      "18:39:56 [DEBUG] train episode 944: reward = 145.37, steps = 1000\n",
      "18:40:01 [DEBUG] train episode 945: reward = 258.16, steps = 300\n",
      "18:40:03 [DEBUG] train episode 946: reward = 72.58, steps = 151\n",
      "18:40:11 [DEBUG] train episode 947: reward = 265.94, steps = 506\n",
      "18:40:14 [DEBUG] train episode 948: reward = 287.80, steps = 271\n",
      "18:40:19 [DEBUG] train episode 949: reward = 271.69, steps = 364\n",
      "18:40:21 [DEBUG] train episode 950: reward = 9.70, steps = 129\n",
      "18:40:35 [DEBUG] train episode 951: reward = 223.55, steps = 970\n",
      "18:40:41 [DEBUG] train episode 952: reward = 242.70, steps = 404\n",
      "18:40:56 [DEBUG] train episode 953: reward = 137.20, steps = 1000\n",
      "18:41:10 [DEBUG] train episode 954: reward = 131.76, steps = 1000\n",
      "18:41:24 [DEBUG] train episode 955: reward = 161.45, steps = 1000\n",
      "18:41:27 [DEBUG] train episode 956: reward = 284.07, steps = 204\n",
      "18:41:42 [DEBUG] train episode 957: reward = 145.33, steps = 1000\n",
      "18:41:58 [DEBUG] train episode 958: reward = 138.87, steps = 1000\n",
      "18:42:11 [DEBUG] train episode 959: reward = 240.89, steps = 874\n",
      "18:42:16 [DEBUG] train episode 960: reward = 247.68, steps = 373\n",
      "18:42:24 [DEBUG] train episode 961: reward = 275.73, steps = 607\n",
      "18:42:40 [DEBUG] train episode 962: reward = 147.51, steps = 1000\n",
      "18:42:56 [DEBUG] train episode 963: reward = 130.93, steps = 1000\n",
      "18:43:10 [DEBUG] train episode 964: reward = 143.72, steps = 1000\n",
      "18:43:13 [DEBUG] train episode 965: reward = 10.94, steps = 185\n",
      "18:43:16 [DEBUG] train episode 966: reward = 279.68, steps = 229\n",
      "18:43:30 [DEBUG] train episode 967: reward = 116.36, steps = 1000\n",
      "18:43:44 [DEBUG] train episode 968: reward = 116.62, steps = 1000\n",
      "18:43:59 [DEBUG] train episode 969: reward = 110.02, steps = 1000\n",
      "18:44:02 [DEBUG] train episode 970: reward = -42.73, steps = 198\n",
      "18:44:13 [DEBUG] train episode 971: reward = 270.28, steps = 778\n",
      "18:44:28 [DEBUG] train episode 972: reward = 143.24, steps = 1000\n",
      "18:44:41 [DEBUG] train episode 973: reward = 238.53, steps = 834\n",
      "18:44:43 [DEBUG] train episode 974: reward = 25.61, steps = 120\n",
      "18:44:58 [DEBUG] train episode 975: reward = 114.87, steps = 1000\n",
      "18:45:00 [DEBUG] train episode 976: reward = 246.41, steps = 182\n",
      "18:45:04 [DEBUG] train episode 977: reward = 272.81, steps = 274\n",
      "18:45:07 [DEBUG] train episode 978: reward = 284.30, steps = 156\n",
      "18:45:09 [DEBUG] train episode 979: reward = 275.49, steps = 181\n",
      "18:45:11 [DEBUG] train episode 980: reward = -188.34, steps = 128\n",
      "18:45:26 [DEBUG] train episode 981: reward = 148.86, steps = 1000\n",
      "18:45:34 [DEBUG] train episode 982: reward = 213.56, steps = 496\n",
      "18:45:49 [DEBUG] train episode 983: reward = 109.34, steps = 1000\n",
      "18:45:52 [DEBUG] train episode 984: reward = 234.93, steps = 235\n",
      "18:46:07 [DEBUG] train episode 985: reward = 130.47, steps = 1000\n",
      "18:46:11 [DEBUG] train episode 986: reward = 248.10, steps = 298\n",
      "18:46:13 [DEBUG] train episode 987: reward = 3.70, steps = 143\n",
      "18:46:14 [DEBUG] train episode 988: reward = 20.89, steps = 137\n",
      "18:46:18 [DEBUG] train episode 989: reward = 222.76, steps = 261\n",
      "18:46:24 [DEBUG] train episode 990: reward = 288.13, steps = 429\n",
      "18:46:28 [DEBUG] train episode 991: reward = 244.69, steps = 295\n",
      "18:46:32 [DEBUG] train episode 992: reward = 262.47, steps = 337\n",
      "18:46:35 [DEBUG] train episode 993: reward = 281.84, steps = 226\n",
      "18:46:39 [DEBUG] train episode 994: reward = 261.27, steps = 296\n",
      "18:46:53 [DEBUG] train episode 995: reward = 172.76, steps = 1000\n",
      "18:47:08 [DEBUG] train episode 996: reward = 129.99, steps = 1000\n",
      "18:47:22 [DEBUG] train episode 997: reward = 146.06, steps = 1000\n",
      "18:47:36 [DEBUG] train episode 998: reward = 145.55, steps = 1000\n",
      "18:47:50 [DEBUG] train episode 999: reward = 166.10, steps = 1000\n",
      "18:47:53 [DEBUG] train episode 1000: reward = 271.57, steps = 228\n",
      "18:47:55 [DEBUG] train episode 1001: reward = 56.52, steps = 134\n",
      "18:48:01 [DEBUG] train episode 1002: reward = 271.93, steps = 501\n",
      "18:48:05 [DEBUG] train episode 1003: reward = 285.02, steps = 295\n",
      "18:48:20 [DEBUG] train episode 1004: reward = 133.74, steps = 1000\n",
      "18:48:34 [DEBUG] train episode 1005: reward = 113.48, steps = 1000\n",
      "18:48:38 [DEBUG] train episode 1006: reward = 222.72, steps = 323\n",
      "18:48:45 [DEBUG] train episode 1007: reward = 279.40, steps = 535\n",
      "18:49:00 [DEBUG] train episode 1008: reward = 128.84, steps = 1000\n",
      "18:49:03 [DEBUG] train episode 1009: reward = 246.11, steps = 217\n",
      "18:49:17 [DEBUG] train episode 1010: reward = 132.16, steps = 1000\n",
      "18:49:20 [DEBUG] train episode 1011: reward = -199.85, steps = 209\n",
      "18:49:23 [DEBUG] train episode 1012: reward = 257.34, steps = 227\n",
      "18:49:28 [DEBUG] train episode 1013: reward = 278.25, steps = 371\n",
      "18:49:32 [DEBUG] train episode 1014: reward = 243.59, steps = 336\n",
      "18:49:34 [DEBUG] train episode 1015: reward = -37.41, steps = 83\n",
      "18:49:41 [DEBUG] train episode 1016: reward = 244.25, steps = 571\n",
      "18:49:56 [DEBUG] train episode 1017: reward = 171.17, steps = 1000\n",
      "18:49:57 [DEBUG] train episode 1018: reward = 24.91, steps = 110\n",
      "18:49:59 [DEBUG] train episode 1019: reward = 38.19, steps = 154\n",
      "18:50:14 [DEBUG] train episode 1020: reward = 103.89, steps = 1000\n",
      "18:50:18 [DEBUG] train episode 1021: reward = 279.42, steps = 298\n",
      "18:50:32 [DEBUG] train episode 1022: reward = 171.37, steps = 1000\n",
      "18:50:47 [DEBUG] train episode 1023: reward = 141.96, steps = 1000\n",
      "18:51:01 [DEBUG] train episode 1024: reward = 131.08, steps = 1000\n",
      "18:51:03 [DEBUG] train episode 1025: reward = 70.47, steps = 113\n",
      "18:51:07 [DEBUG] train episode 1026: reward = 244.15, steps = 326\n",
      "18:51:22 [DEBUG] train episode 1027: reward = 86.42, steps = 1000\n",
      "18:51:25 [DEBUG] train episode 1028: reward = 225.56, steps = 233\n",
      "18:51:27 [DEBUG] train episode 1029: reward = -229.27, steps = 117\n",
      "18:51:33 [DEBUG] train episode 1030: reward = 248.30, steps = 471\n",
      "18:51:47 [DEBUG] train episode 1031: reward = 141.96, steps = 1000\n",
      "18:52:02 [DEBUG] train episode 1032: reward = 158.02, steps = 1000\n",
      "18:52:16 [DEBUG] train episode 1033: reward = 126.78, steps = 1000\n",
      "18:52:17 [DEBUG] train episode 1034: reward = -54.74, steps = 58\n",
      "18:52:31 [DEBUG] train episode 1035: reward = 130.50, steps = 1000\n",
      "18:52:45 [DEBUG] train episode 1036: reward = 134.02, steps = 1000\n",
      "18:52:50 [DEBUG] train episode 1037: reward = 273.50, steps = 326\n",
      "18:53:05 [DEBUG] train episode 1038: reward = 113.90, steps = 1000\n",
      "18:53:10 [DEBUG] train episode 1039: reward = 231.13, steps = 383\n",
      "18:53:12 [DEBUG] train episode 1040: reward = -33.05, steps = 92\n",
      "18:53:14 [DEBUG] train episode 1041: reward = 19.81, steps = 172\n",
      "18:53:18 [DEBUG] train episode 1042: reward = 254.22, steps = 332\n",
      "18:53:19 [DEBUG] train episode 1043: reward = -31.29, steps = 75\n",
      "18:53:30 [DEBUG] train episode 1044: reward = 246.31, steps = 757\n",
      "18:53:37 [DEBUG] train episode 1045: reward = 210.33, steps = 495\n",
      "18:53:51 [DEBUG] train episode 1046: reward = 117.26, steps = 1000\n",
      "18:54:05 [DEBUG] train episode 1047: reward = 129.29, steps = 1000\n",
      "18:54:20 [DEBUG] train episode 1048: reward = 142.76, steps = 1000\n",
      "18:54:21 [DEBUG] train episode 1049: reward = -51.24, steps = 99\n",
      "18:54:26 [DEBUG] train episode 1050: reward = 254.33, steps = 338\n",
      "18:54:33 [DEBUG] train episode 1051: reward = 280.18, steps = 516\n",
      "18:54:35 [DEBUG] train episode 1052: reward = 221.16, steps = 188\n",
      "18:54:40 [DEBUG] train episode 1053: reward = 257.07, steps = 340\n",
      "18:54:42 [DEBUG] train episode 1054: reward = -42.04, steps = 129\n",
      "18:54:44 [DEBUG] train episode 1055: reward = 18.48, steps = 172\n",
      "18:54:45 [DEBUG] train episode 1056: reward = -77.88, steps = 76\n",
      "18:54:52 [DEBUG] train episode 1057: reward = 203.52, steps = 481\n",
      "18:54:53 [DEBUG] train episode 1058: reward = -45.13, steps = 78\n",
      "18:55:08 [DEBUG] train episode 1059: reward = 41.83, steps = 1000\n",
      "18:55:13 [DEBUG] train episode 1060: reward = 265.92, steps = 388\n",
      "18:55:17 [DEBUG] train episode 1061: reward = 239.85, steps = 258\n",
      "18:55:19 [DEBUG] train episode 1062: reward = -23.84, steps = 164\n",
      "18:55:23 [DEBUG] train episode 1063: reward = 197.61, steps = 281\n",
      "18:55:38 [DEBUG] train episode 1064: reward = 138.22, steps = 1000\n",
      "18:55:40 [DEBUG] train episode 1065: reward = -54.58, steps = 128\n",
      "18:55:42 [DEBUG] train episode 1066: reward = 17.46, steps = 159\n",
      "18:55:46 [DEBUG] train episode 1067: reward = 177.45, steps = 295\n",
      "18:56:00 [DEBUG] train episode 1068: reward = 87.30, steps = 1000\n",
      "18:56:02 [DEBUG] train episode 1069: reward = -35.31, steps = 108\n",
      "18:56:05 [DEBUG] train episode 1070: reward = 245.48, steps = 263\n",
      "18:56:07 [DEBUG] train episode 1071: reward = -54.59, steps = 108\n",
      "18:56:11 [DEBUG] train episode 1072: reward = 263.13, steps = 312\n",
      "18:56:14 [DEBUG] train episode 1073: reward = 253.31, steps = 236\n",
      "18:56:21 [DEBUG] train episode 1074: reward = 260.72, steps = 466\n",
      "18:56:35 [DEBUG] train episode 1075: reward = 133.72, steps = 1000\n",
      "18:56:39 [DEBUG] train episode 1076: reward = 262.66, steps = 251\n",
      "18:56:40 [DEBUG] train episode 1077: reward = -89.85, steps = 138\n",
      "18:56:42 [DEBUG] train episode 1078: reward = -450.04, steps = 117\n",
      "18:56:46 [DEBUG] train episode 1079: reward = 275.63, steps = 274\n",
      "18:56:48 [DEBUG] train episode 1080: reward = -65.11, steps = 136\n",
      "18:57:02 [DEBUG] train episode 1081: reward = 140.64, steps = 1000\n",
      "18:57:17 [DEBUG] train episode 1082: reward = 136.91, steps = 1000\n",
      "18:57:21 [DEBUG] train episode 1083: reward = 271.82, steps = 256\n",
      "18:57:24 [DEBUG] train episode 1084: reward = 283.92, steps = 272\n",
      "18:57:26 [DEBUG] train episode 1085: reward = -60.55, steps = 104\n",
      "18:57:41 [DEBUG] train episode 1086: reward = 106.55, steps = 1000\n",
      "18:57:45 [DEBUG] train episode 1087: reward = 237.74, steps = 255\n",
      "18:57:48 [DEBUG] train episode 1088: reward = 263.52, steps = 211\n",
      "18:57:51 [DEBUG] train episode 1089: reward = 232.83, steps = 199\n",
      "18:57:55 [DEBUG] train episode 1090: reward = 244.00, steps = 297\n",
      "18:57:59 [DEBUG] train episode 1091: reward = 279.19, steps = 262\n",
      "18:58:03 [DEBUG] train episode 1092: reward = 203.34, steps = 344\n",
      "18:58:08 [DEBUG] train episode 1093: reward = 287.05, steps = 295\n",
      "18:58:15 [DEBUG] train episode 1094: reward = 235.25, steps = 491\n",
      "18:58:19 [DEBUG] train episode 1095: reward = 250.99, steps = 247\n",
      "18:58:23 [DEBUG] train episode 1096: reward = 289.25, steps = 248\n",
      "18:58:23 [INFO] ==== test ====\n",
      "18:58:23 [DEBUG] test episode 0: reward = 279.16, steps = 244\n",
      "18:58:23 [DEBUG] test episode 1: reward = 238.80, steps = 277\n",
      "18:58:24 [DEBUG] test episode 2: reward = 256.58, steps = 407\n",
      "18:58:24 [DEBUG] test episode 3: reward = 260.11, steps = 247\n",
      "18:58:25 [DEBUG] test episode 4: reward = 253.51, steps = 295\n",
      "18:58:25 [DEBUG] test episode 5: reward = 244.07, steps = 277\n",
      "18:58:27 [DEBUG] test episode 6: reward = 107.44, steps = 1000\n",
      "18:58:28 [DEBUG] test episode 7: reward = 256.76, steps = 358\n",
      "18:58:28 [DEBUG] test episode 8: reward = 253.51, steps = 216\n",
      "18:58:28 [DEBUG] test episode 9: reward = 273.04, steps = 241\n",
      "18:58:28 [DEBUG] test episode 10: reward = 217.87, steps = 237\n",
      "18:58:30 [DEBUG] test episode 11: reward = 104.29, steps = 1000\n",
      "18:58:31 [DEBUG] test episode 12: reward = 215.63, steps = 226\n",
      "18:58:31 [DEBUG] test episode 13: reward = 260.16, steps = 264\n",
      "18:58:31 [DEBUG] test episode 14: reward = 266.09, steps = 357\n",
      "18:58:32 [DEBUG] test episode 15: reward = 257.40, steps = 334\n",
      "18:58:32 [DEBUG] test episode 16: reward = 246.24, steps = 337\n",
      "18:58:33 [DEBUG] test episode 17: reward = 257.08, steps = 353\n",
      "18:58:33 [DEBUG] test episode 18: reward = 255.00, steps = 274\n",
      "18:58:35 [DEBUG] test episode 19: reward = 151.24, steps = 1000\n",
      "18:58:36 [DEBUG] test episode 20: reward = 257.41, steps = 552\n",
      "18:58:38 [DEBUG] test episode 21: reward = 107.49, steps = 1000\n",
      "18:58:38 [DEBUG] test episode 22: reward = 225.74, steps = 224\n",
      "18:58:40 [DEBUG] test episode 23: reward = 141.09, steps = 1000\n",
      "18:58:42 [DEBUG] test episode 24: reward = 142.38, steps = 1000\n",
      "18:58:42 [DEBUG] test episode 25: reward = 238.57, steps = 257\n",
      "18:58:42 [DEBUG] test episode 26: reward = 262.03, steps = 258\n",
      "18:58:43 [DEBUG] test episode 27: reward = 253.45, steps = 206\n",
      "18:58:43 [DEBUG] test episode 28: reward = 241.28, steps = 345\n",
      "18:58:43 [DEBUG] test episode 29: reward = 221.80, steps = 244\n",
      "18:58:45 [DEBUG] test episode 30: reward = 158.97, steps = 1000\n",
      "18:58:46 [DEBUG] test episode 31: reward = 211.46, steps = 377\n",
      "18:58:46 [DEBUG] test episode 32: reward = 263.97, steps = 348\n",
      "18:58:47 [DEBUG] test episode 33: reward = 243.19, steps = 256\n",
      "18:58:47 [DEBUG] test episode 34: reward = 258.01, steps = 364\n",
      "18:58:47 [DEBUG] test episode 35: reward = 279.04, steps = 253\n",
      "18:58:48 [DEBUG] test episode 36: reward = 268.28, steps = 285\n",
      "18:58:48 [DEBUG] test episode 37: reward = 220.07, steps = 228\n",
      "18:58:48 [DEBUG] test episode 38: reward = 253.82, steps = 428\n",
      "18:58:49 [DEBUG] test episode 39: reward = 259.66, steps = 309\n",
      "18:58:51 [DEBUG] test episode 40: reward = 194.73, steps = 912\n",
      "18:58:51 [DEBUG] test episode 41: reward = 266.65, steps = 241\n",
      "18:58:52 [DEBUG] test episode 42: reward = 197.65, steps = 513\n",
      "18:58:52 [DEBUG] test episode 43: reward = 266.24, steps = 391\n",
      "18:58:53 [DEBUG] test episode 44: reward = 253.45, steps = 243\n",
      "18:58:53 [DEBUG] test episode 45: reward = 263.09, steps = 365\n",
      "18:58:55 [DEBUG] test episode 46: reward = 101.22, steps = 1000\n",
      "18:58:56 [DEBUG] test episode 47: reward = 258.37, steps = 471\n",
      "18:58:56 [DEBUG] test episode 48: reward = 252.55, steps = 256\n",
      "18:58:58 [DEBUG] test episode 49: reward = 168.53, steps = 894\n",
      "18:58:58 [DEBUG] test episode 50: reward = 253.71, steps = 439\n",
      "18:58:59 [DEBUG] test episode 51: reward = 232.88, steps = 207\n",
      "18:59:01 [DEBUG] test episode 52: reward = 210.76, steps = 908\n",
      "18:59:01 [DEBUG] test episode 53: reward = 263.08, steps = 343\n",
      "18:59:02 [DEBUG] test episode 54: reward = 224.02, steps = 315\n",
      "18:59:02 [DEBUG] test episode 55: reward = 232.40, steps = 311\n",
      "18:59:02 [DEBUG] test episode 56: reward = 256.63, steps = 305\n",
      "18:59:03 [DEBUG] test episode 57: reward = 248.26, steps = 372\n",
      "18:59:03 [DEBUG] test episode 58: reward = 273.76, steps = 386\n",
      "18:59:05 [DEBUG] test episode 59: reward = 135.52, steps = 1000\n",
      "18:59:07 [DEBUG] test episode 60: reward = 148.69, steps = 1000\n",
      "18:59:07 [DEBUG] test episode 61: reward = 251.73, steps = 266\n",
      "18:59:08 [DEBUG] test episode 62: reward = 251.96, steps = 180\n",
      "18:59:08 [DEBUG] test episode 63: reward = 246.56, steps = 313\n",
      "18:59:08 [DEBUG] test episode 64: reward = 254.47, steps = 326\n",
      "18:59:09 [DEBUG] test episode 65: reward = -2.94, steps = 246\n",
      "18:59:09 [DEBUG] test episode 66: reward = 257.42, steps = 354\n",
      "18:59:09 [DEBUG] test episode 67: reward = 256.20, steps = 336\n",
      "18:59:10 [DEBUG] test episode 68: reward = 246.34, steps = 293\n",
      "18:59:12 [DEBUG] test episode 69: reward = 118.35, steps = 1000\n",
      "18:59:12 [DEBUG] test episode 70: reward = 221.84, steps = 334\n",
      "18:59:13 [DEBUG] test episode 71: reward = 242.60, steps = 349\n",
      "18:59:13 [DEBUG] test episode 72: reward = 265.19, steps = 244\n",
      "18:59:13 [DEBUG] test episode 73: reward = 272.73, steps = 262\n",
      "18:59:14 [DEBUG] test episode 74: reward = 240.03, steps = 443\n",
      "18:59:14 [DEBUG] test episode 75: reward = 263.69, steps = 227\n",
      "18:59:15 [DEBUG] test episode 76: reward = 195.27, steps = 387\n",
      "18:59:15 [DEBUG] test episode 77: reward = 238.01, steps = 432\n",
      "18:59:16 [DEBUG] test episode 78: reward = 245.46, steps = 343\n",
      "18:59:16 [DEBUG] test episode 79: reward = 237.64, steps = 302\n",
      "18:59:16 [DEBUG] test episode 80: reward = 265.55, steps = 278\n",
      "18:59:17 [DEBUG] test episode 81: reward = 271.83, steps = 404\n",
      "18:59:17 [DEBUG] test episode 82: reward = 255.63, steps = 233\n",
      "18:59:17 [DEBUG] test episode 83: reward = 255.71, steps = 434\n",
      "18:59:18 [DEBUG] test episode 84: reward = 247.68, steps = 444\n",
      "18:59:18 [DEBUG] test episode 85: reward = 202.10, steps = 326\n",
      "18:59:19 [DEBUG] test episode 86: reward = 268.52, steps = 255\n",
      "18:59:19 [DEBUG] test episode 87: reward = 247.50, steps = 287\n",
      "18:59:19 [DEBUG] test episode 88: reward = 276.68, steps = 358\n",
      "18:59:21 [DEBUG] test episode 89: reward = 139.25, steps = 1000\n",
      "18:59:23 [DEBUG] test episode 90: reward = 94.53, steps = 1000\n",
      "18:59:23 [DEBUG] test episode 91: reward = 286.10, steps = 323\n",
      "18:59:23 [DEBUG] test episode 92: reward = 263.29, steps = 320\n",
      "18:59:25 [DEBUG] test episode 93: reward = 108.22, steps = 1000\n",
      "18:59:27 [DEBUG] test episode 94: reward = 146.27, steps = 1000\n",
      "18:59:27 [DEBUG] test episode 95: reward = 235.82, steps = 322\n",
      "18:59:29 [DEBUG] test episode 96: reward = 143.85, steps = 1000\n",
      "18:59:30 [DEBUG] test episode 97: reward = 241.33, steps = 455\n",
      "18:59:30 [DEBUG] test episode 98: reward = 287.49, steps = 258\n",
      "18:59:32 [DEBUG] test episode 99: reward = 140.57, steps = 1000\n",
      "18:59:32 [INFO] average episode reward = 225.44 ± 54.79\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYYAAAD4CAYAAADo30HgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAABBW0lEQVR4nO2deZgUxfnHv+/swQ3LsQgs4HLfArIgoOKFiqLiHTRGTUw8ookak58gIWoiXlGTqFGDN8Yz3gqCAioeXMshhwuywAIrIMu5y7XXvL8/pnumu6e7p3tmeq59P8+zz/ZUX1XdXfVWve9bbxEzQxAEQRBUfMnOgCAIgpBaiGAQBEEQdIhgEARBEHSIYBAEQRB0iGAQBEEQdGQnOwOx0q5dOy4sLEx2NgRBENKKZcuW7WbmfLN9aS8YCgsLUVxcnOxsCIIgpBVEtMVqn6iSBEEQBB0iGARBEAQdIhgEQRAEHSIYBEEQBB0iGARBEAQdIhgEQRAEHSIYBEEQBB0iGARBiCtHa+vjdq3qunocrK6Ly7UqqqpRUVVtuq+u3o83l25FXb0/LvdKd0QwCIIQN95aug19p87Glj2H4nK9CU9+g4F3z9Gl7TlYjUc/XY96v7u1ZIZPm4vh0+aa7nt18Vbc+c5qvLZkK5gZH6z8ETV11kKi3s/wK/ffvv8I5pX8FNx3sLrOcwGztGwvfvH8YnxTutuT64tgEIQo2L7/CG5+dTmO1MSvdxxvSndVoWx3fBpop3y8egcAYFOE+y4t24vyfYcjXm/dzqqwtHs/+h5PzC/FVxsqcOsbK1A4aaZu/7ySn7DmxwMucg38VHkUALD/cC3mlezCrW+sxBPzN6Bw0kzc/cEaVB6txb8/L8XhmsDoZeQD83DL68sBABc/9S2ue7kYj8/bAGbGwLvnYNh9c7F86z7MWbsTR2vrMWv1Dmz4KbwsKsu37sPrS7Za7j9aW4//fLkRhZNmYvPuQ7jsmYX4asNu/LjviKtyOsXTkBhE1AXADAAdAPgBTGfmfxHRPQB+A6BCOfQuZp6lnDMZwHUA6gH8npnnhF1YEJLM/bNKMHP1Dpw9sAMuGNwp2dkxZexjCwAAZQ+OtzzG72ds2XsY3do1i8s9a5Vedm6WfZ/zsmcWIieLsPqes7FuZxWGdMkL7lu3sxJ19Ywlm/eanltdFxDGR2vr8cHK7WH7r3s5ECKn7MHx2Lb3MFZu24/zI7yjemUlyywfYf+RWgDAhp8OAgBeXrgFLy8MRI94Z3k5Jp/TDxVV1Zi1eicAYKciVB777Adk+QgAcOBILS5+6lsAwK9P6obnvt4czBMAvLOsHA98UoLFd41Flo+Cx14xoitq6vzIySIQUTB/fafODm6f/8TXwe2WTXJsyxUtXsdKqgNwBzMvJ6IWAJYR0WfKvn8w8yPag4moP4CJAAYA6ARgLhH1ZubU7ZYJDRK10sZjadxD1XX44acqDO3aOuZrueWFbzbjvpkl+Ph3J2FgQatgut/P8PnI5sxwZq/ZgYWb9gAAcrJ8YGY8//VmnNqnPXq2b46aOj/8zGickwUAqK1n3P3BWrxZvA19O7TA/24cheaNsjHun19Z3qOu3o85awNqm32HayPm6YzHvkRNnT+iYIDyGst2HwqOZLKzwsu/qeIQfjPDOjbb3+esD0vboQgOLVPeX42jtX5U19WjaW6oGd53qAZD//YZJp/TFzec0gNPf7ERD81epztXa3Np5ZFg8FSVxMw7mHm5sl0FoARAgc0pEwC8wczVzLwZQCmAEV7mURC07Ko86qixV9vMeCyZPvWDNbjoqW+x44B7tYCqXohk8P1x/xFs2xuuulm5bT8AYLNG9fPhd9vR/a5ZeG3xVktj7e1vrsQvX1yCZxdswvb9gXzf+N/lwf2PfLoeZ/1jAe6bWYKxj32JW99YgTEPf46+U2frnu+bxdsABFRGry7eim6TZ9mWo+eUT4Lbk99dHdwu3VWF0Q/Mw6aKg7rjVTvBvR+tDaaV7T6EPn/+RFdmv5Kn/y0rx+tLAnnyUWTB6ESV2Dg7K7hdV+/HzgMhQXHWPxZgkSJMAeDOd1YBCAhsAGFCwUhe0zQUDFqIqBDAUACLlaRbiGgVEb1ARGpXqQDANs1p5TARJER0PREVE1FxRUWFcbcgRMW6nZUYcf88/HeRPujkzgPhwkJtMvwxSobdB6vxqdIDLt110PSYIzX1qDxq3jtWVRQHjtj3nk98cD5OfvjzsPRGSqNVXecHM+PGV5ZhstI43fXeapz3xFc4XFOH6Qs24pCmp/reih/x+foKTJtVgtEPzg8Taks278UGTXk+WLk9qHKptjDqqqqbaPjrxyXYfuAoTn/0S9P9L35Tpst7dZ0f7634MZhWWx/+Hj/8LlxNZeScfy2IeMw7y8uD2z2nfIKRD8zD0drAMyjfdwQTpy8K7v/0+8C3sPtgDVaV74947bbNcyMeEw0JEQxE1BzAOwBuY+ZKAE8D6AFgCIAdAB5VDzU5PeyNMfN0Zi5i5qL8fNNw4oLgmo27Aj3IqR+sxdjHAg1MyY5KjHxgHmYs1AsLX1CVBDz26Xr0nfoJouH8J74OqgZ2VZr3zk975Ascd8+nYek3v7Y82KOPVj7lZgeagPdWlKN010HMXrsThzS94J8qq9H/L3Nw/6x1eOCTEttyOMWql61tQN3iRqWnHqltbOr80XkRle2JbECPhno/46VvyyIel9+8kSf391wwEFEOAkLhVWZ+FwCY+SdmrmdmP4BnEVIXlQPoojm9M4DIYlsQTKip8+NXLy3F99srHR3Pmj5I6a6DOO+Jr3DJ0wGj4MKNe1Dv55A6RmlV/Mx4fH5psAfohnU7K7FDo1awGn3sNNFRA8DMVTuC25VHa/H815uxbmd4We96b3VYmkojRTB8U7oHv7bRnQPAgSMBAXbARLe/+2CN7blaDsdxnoOK2aOL5M6q1RTV1sVBJxhvImSpSU6WzkAdTzwVDBTI9fMASpj5MU16R81hFwFYo2x/CGAiETUiom4AegFY4mUehcylZEcl5q/bhUnvrnJ0vLEdWfNjJQ4rvdvZa3di8L2f4uSHP8fWPYdBimRw25ys21mJqe+vwUffbQ8zskZSSz31RamlLeGsfyzA3z7+3tRw+9piazdIVTAAIY8iK5gZ2/cfweC/ho9e3BCti6/fpqE36/H3mmJhr1Ce8/7Dtbjnw7WorqtHbZQjBpUsl4Z6K3q2b+742EuG2ZlrY8Nrr6QTAfwCwGoiWqmk3QXgCiIagkC9KgNwAwAw81oiegvA9wh4NN0sHklCtLhttCOpI1SVz87Ko0Hjs9ubXPvCUuysPIpXFoUvnqW2e5+u3YlXFm3B/RcNQpc2TYP7H569Hkdq6nHHWX1s7/HKoi2odtgr1wqGSF5IDOvRixuinRn93NebLPfVmdgIrOSImq6qaooKW5ue74amuVmoOhr7DG2tHcdUsa4hHo4PVngqGJj5a5gXz9L1gJmnAZjmWaaEBkOlYpA9UlOPy59ZiPsvHmTbI3Na0er9HFRDODU+f/TddvTr2DKorupzTAusN0x4mleyC0vL9uLd5QGj6MkPfx42B6FkR2S12NT310Q8RiVXIxgiFoX1giRajkQhGPx+1nkRGalzMQu63lDQprlZUdsYVJrkxEcwaFWLew/Zq+fGDewQ8/2sSPs1nwXBiqtfCGghVe+Yh2avw7NXF1kef9ubKx1d188cND47bY9+9/oKAED7FgFj4YBOLcMEw1xNWAVrAvd1Gw7CyIHDtag4eBTayA2RhByD4yMYolAl1TPbCi43z8M4A5mI8EMMHlFAQLjEmy/Wm3tcNsr2Yf1958T9flokJIaQcZTsqMSoB+aFpds1LLUuYts88ElJ0L1R25ja6cBVdileRE5HGlYui+/G4MEDABc+9Q3GPrYA9ZqespM8OfHtj0Q0I4Z6v71gcDNimFuyS/e7fN8RS1dhpzTJTVwfOx7vIOI9PL+DICSYp7/YqBuSq6g98i17DmHEtLn4eFXI4e3GV5Y5vv6aHyuDrpXa5sioorDDaUN2wZPf6H7PLfkJB47U4jWbuDpOUNUytZp8/GThLqvC7N5uY4Yab8gNJTsqbedqxBK0Lh7xhrwYMVgRL0O3HSIYhIwjUs93/c4q7Kqqxi2vrUB1XT2YGfPW7bI9xxLtiMGNYIjB2Dnj2zIUto0utlEgvlBoYpebBpU5PiFANu927/t/0VPfYvbanZb7Y1GtPfPlxqjPVfEqNIUZCRgwiI1ByDwiNRGHND3WXZXV+GpD9KGLte2Rlf3STMXkRvVh5NHPfoj63Hs/WhsM+QBYz0I2g8GObSp2PD5vQ+wXMRCru2mseBWawgxRJQlCFETq1R6qDum4s3yE91a419dn+8KD6FmpksyEQKxeMNFiFIJuDMGBEUO8cxQf6mN0N40VUSUJQooTqfHS+opHW8nUyJvaNr/SQgdupuZwY+yOJ+UGffr/ljkXiozYY0N5RW08hjIxkJUI/Y5CAuSCCAYh84jUeGkFQ7TD8hxfoOpo7zT6wfmmx5qNDsyCtqU60YwY/jVxCP50tv2EPCP3XzTI3U3gzsYwtGue42OvHV2IjfefG/G4LJ8Pp/dt7/i6sSCqJEGIgkiN12GN+iTa3pc6YnBijDVrtBK1trA672Bw51YRjnQCux4xFOQ1cb2Q0bFtm+JvEwa4OifSZDCVvKY5eO+3J5ruM1tc6O7z+zsaVfoIeOHa4WiS471KSQSDIESBXeexcNLMYKhqIDC5iSLFHjAhW2lEnLSTpoLBA9WH2SpsrZsGwjJ31oTWiJZotEiNsrNcq+t8RJg4oqv7mzlA+y7OMcwcbt8yPFJptEHqxh/XMfJBUSI2BkGIAlchmKPUmef4VBtDdCMGL1RJZj1eNeTFwE6xjxjmrdvlSmXz8xO6YkCnlkFDvRU+Cix5OaKwDYCAO2ZOlg8v/yq0RleXNk2iy7QB9XWtuucsPH7FUN2+aBrcxjmB52uUH3+bMDCq/DkhEeYMEQxCxpEIA6k6YjC2k599Hx7Wwmx04IXx2cwrSm1EckyWqbSjtYX75YR/f2Oabsa0iwbB5yPL0dH5gzvhrxMG4OPfnaxkNvDPWIzCtk1x/cndHd/XjlvP6AUAaNk4BzlZvqAwmHPbmKiu17xRwOPfOLLw0hgtIwZBiAI3YqF4yz4sKTNfdN6OoI3BcDez9YDNZvp6YWMwu6bahOS6jHF0+fAukQ9yyDEtG5umP3HFUFw9qhD9O7UEoF0uVQk3ogiUwnbNgoI4FsoeHI/fjNELmBtPCfzu1q5ZFApFa1WTz8OWNREeUCIYhIzDjfp+xsKyqO4RmscQeWh/86srwtK8UCWZXVE1VGa7bKmibXx+f3rP8Gv5CDee0iPiucFV8ZTf6kgji9xbgSKpr1T+eFYfbLz/XNeCMxJeGohFlSQIUeDGbhDtojGhpT05YiNgjKIKBFxYC9vGbhBWue6kbnjOLHJslKqkaBufY6MM1QFAE7E28P5Ue4bPR67z47RhJqKgakbt/UejVjLezez2/7txlOvrmpEIVZKExLChtt6Pmjo/mjWSx5ROuDEx5ESpoiDSjBiiOL+unuHLdX4mkX25pp7X3/w85X+8e8RWWA1MnLTToTUu1P+hEYNrYjjFrN2N9PzVk1+4djhmLCwzdVvtbuI1Fg3irppkrnu5GAPunpPsbAguMer97YhWMKi89G1ZWEVVQ2XvP1xjaUvYc6jGVZyiey+w9uu3m0Cm5s1tOaNx4dXeLxbUEV9QlZQVnUtxtJjZDZyWa1SPtnj6qmGm12gSp7AZiRAM0hW2YcEP5gtlCPHhaG09Ko/Won0Lc+OkW6qO1qKiqtoymJ0Z63ZGXhHNjj0mE6v+V1yOmat24D8LrJeiBIAf9zsP91xjI0Ts1EQhr6RwwXDTqT3w9Bf2kUV7tm+OH/cdcbyGQiyNltHG4NfYGNzKhXg3nVlEqLfpcDgRXPGa/OalYTt4D+9v4Q4iGkdE64molIgmJTs/TjhUXYcdB2KP6d4QuPLZRXho9jpsqjiIP7y1EiOmzTNdA/jJ+RvwqRJmufJoLQ7X1GH+up/w6Kfrg8e8smiLboGVnz+3GKc/+iUWbtrjOD+7DzqbMWvEbonNeuaIQsEtdqMLu8ZYbbDMhEffDi0i3veCwZ1wRj/noR6s8uKkoVa9k9o1C0w0C6qSfO7HC1HJJ5tzrK7nRm0Z7WQ5I4nwSkqpEQMRZQH4N4AzAZQDWEpEHzLz917dc+32A/hifQVuPi3cm8Iplz6zECU7KsPW520ILC3biy/XV+CPDuPhfLtxD77duEfXU+07dXbgWlPGIl9Z+vKRTwOhpRffdQZO/fsXyM32BRdqmb5gE1785XBMfX8NGmX7cOvYXrhgcCesKj8Qz6JFDTPj1D75lkszRoOdYLBrcNRdZg222YQ4Mx69fDBuP7M3znj0y4jHxqKZu+PM3jijb3sMUsJ3nDuoI77duAd3juuLL12O3mNTPYW39o1zsoLvYFBBKwztmocZC7dg98HA4kZtmtmH3V5z79kx5EdPvASMHak2YhgBoJSZNzFzDYA3AEzw8oYTnvwGf5+zPqYFSJws0J6pXPbMQjz5eWlcFnAZPm0uvt9eqVu/4IT75+FIbb1u9a7qOj+ufHZxcPvh2etxzr++ivn+8aKmjnEwDgvD66/px4hubXRp/73uBJx3XEdcXtTZ8jy7RsRuboD2tEbZWeiR39xRPq3u56Qty87yoagwVMbGOVl45LLBwc6CUz64+cSoRgzqKczAtIsG4qqRobAc/7txFMb0zgegeEkp6apw7dexpe21m8fRgaUhTnArALBN87tcSdNBRNcTUTERFVdUxNYrUw1cNUkKg5zObKoIqXGq6/x46otSFE6aGWZwXVq2FzsOHEHVUeulGVXOffwrzI9iNbWqODfERtx49ew/XGNqe3DC3y89DgAwqnvbYNqFQzrhhjHd8dYNenfHbvnN8OSVx6NFY+veqtqEmDWU2Qb10rBjW4cd41bee2UYdXPVwV3yoruHJu8/P+FY3HdhKMpr72Na4I4zewd+cMi94fYze+PJK4fipJ7torpnNDTEsNtmRQ77NJl5OjMXMXNRfn5+TDdUda92xr2GxKJNe3D7myuDI4AFP1TgiXkbUF1XrzPG19T5cbpGtXCkpj64MpdR7XHZMwsx6oH5OPOxBY7y8GuT2cPJ5OPfnYSv7zzN8fHz1u0KrqmsYtWjHGvQ3xfkNUHZg+Px+vUj0bJxoJf5z4lD0bpZbti5ThoIta0zU63kaKyYU8/rrzOORtv2WA1CYvUqMsqbv0aIvuqFgFKvqQ090jQ3C+cd1ykh6h2VhqhKKgegnYvfGcB2i2PjguqtYTcTNR5qklTiaG09Pvv+J9w/qwRAoHzLtuwFM2Pi9EV4b8WPqDxah6O19bj6hSV49LMf0OfPs3H1C0vwq5eWgpkx4v65umserq0PVn5VyB6uqcOuqqPBY3ZWHoUXRNtDdEr/ji1j8px65qph+PP4fqb7nrtmuO53Y41L49w7TrGdbOXECNlBCUehBnvToh0xXHdSN9Nj3Lj+ArGpkuyvG9rOzfJhdI+2psd1zw/MFTCLNOsUqxKrctTvD42kEigPgjTEkBhLAfQiom5ElAtgIoAPvbyhKhjsRgyxLDSebAonzcQDigAAgG9Kd6Pv1Nn4zYxiTF+wCYeq6/BW8TZc8vRCvK1ZzeupL0qDRmEt89ftQrfJs7D/sF4t9MS8DcFKMm1WCQonzUT/v8zBiGnzvCmYBrVn7RW+GMfuwwtb48Se7SyFwz3nhyanaXvt7Vs0Rh8bzyEn+Xr08sF4+JLjTEcsRk+lxnFwp/ROlRS6LlHkXvNLvxxuu9+MKef2Q7vmuehqEaJcOzPbrcCMJw3OxsDMdQBuATAHQAmAt5h5rZf3NBMM0xdsxKuLtwR/W63lm+pU1wXcQLWuk1c9v1h3zIC75wTXAf7T26uC6f/50p275RtLtwWr7tsulou04uReznW2jbJDDdrqe87C75UImolgSITRSl7THLRqErABWH1G157YLbjd1kRlZIWTRjivaS4uH97FtDExnh8PP3ur3mw8mzIn0xraNm/kWu9/Wt/2KP7zmZYCUjUgH6sJZeKkXKrR2sjtY3vj9d+MxL8mDnGVz1g7Ko7u4fkdXMLMs5i5NzP3YOZpXt8vV7Ux1Id86e+ftQ5T3lujyZPXufAG7aL3L36zGcu27DMti3Ed4Ghx21s0LpTyx7N64/zBnTD/jlPwwrXD8c2k08POyfIRlk4ZG/x90dAC3HNBqMfdonEOzh3UIew8OwrymuC7v5yF9282X9nLDrtF4K8dXYiVfzlLE6I78ofUtrlzDxyzRnjuH8aYNjRmr8YoLHSjCk3Ij0i8c9NoFOQF1kvwqs3S5p9Ajr61eA9eurRpiheuLcIjlw121SbM0KwroaWgdROM6tE2TO3VLMIM6UQYn1NqHkMyUCutnZ+4U1USMyfUCGWVh7XbKzGwoJVucfp7P7KeCmI2wSwaqqrdeQb5fIRsJV7/wsmno2Mr/WIseU3CvW2W//lMtNKsFfCPnw0JO8atofPrO08DEWFI0zxX5wGhpTPNMKpq1M/ozP7HhK3b0LdDC6zbWeVKTWA2A7Zn+xbokd8ct76xUn+sgzAPvzyxEH/92P2UoWxfeCC6SMRSTQKqpMjHNcuNf/N2et9jwjMTBZcXdcZ5yipvx3XOw9s3jkLxln148JN1+Nnwrhhe2BpVR+uwafchHK2tx0vflgXPjeQaGw8avGBQK29tPQdVL0aMqqQVW/fhzaXb8MDFg3QVwc+AyyCWUfH5+l04WlOPxZv3Yup5+jVpX/q2DPd+9D0uL+qMt4qdqXSM9pXzjuuIj1ftMD12QKeWWLs9PvM2sohw5QldMWPhFjQ1qcTGRvfEnm2DQuHDW07U+YZ/O+l0VCrusFZta17TnDDbyCm98y0bs4cuGRQxgKJdQ26cJ6COGHq2bx4mGN6+aTQOuxWsloZeZ7F+jGna84I+/Q7ywQi1j1k+wtCueWhvnHtgvJeD69rlTZv3QQWtsPrH8MmN9188CL2OaY4n5pe6vFtk1OcSbXV/+NLBut9FhW2wRilDvd+PcwaFlgZ9fcnW4PbbN46KqL6MByIYNDaGYX+ba2qE9htGDBc//S2YgT+f11/XOPmZkeVxsK8Dh2vxyxeXBn/3bN8cAzq1xNCuAR/01crsX6dCAQiP99O5dUCHetXIrti29wj6dGiB6Qs2oUd+M3x0y0noftesmMrQ55gWWP9ToHd89/kDcNOpPYJ6eC3GhlXr239c5zzdvk55TdAJgRGHVSeuWW52mGCwo3t+cwzXTLgyw66HnGMQGqp3m5ksad4o2/UkKFejCweqJC1uOsL1fr8uMul7v3WvkouEXXb0aqYQbZrl4o6z+ngiGLxgYEFgxvfxhvkk6msaP6ijbgKgl4hg0AiGgxY9NqMqyUq/WO9nxClOlilVR2txwb+/1qX9+f2ALaTswfFYv7MKxVv2RbzO+YM74dvS3UGBoJ1VDACXHF+AMb3bYVT3tsGG76oTjkXrZjk6w9dd5/ZF45wsvPD1ZpTtOey4HNed1A3/984qEAUaJ6MKKVbMGuv+HVuibfPcsMB1dg2gk3bX7pjwEYN6DuFfE4fojObR4KbxNh9FxHT7IPX+kAOAU9V7rCrXRBhg7fDCXbWosA0WTT4DHVrpXaODId4T6AmVcsbnRJNjYnw2YmViMBoT/cx44JMSvLJoCz78LjT94t+fl+IVi5XCauv9KJw0Mzg5zIyD1XWYV/ITPlm9E1ssGuA7316Fs/+5AFv3Rm6g//WzIVg29UzL/U1yszC6Rztd5e3atmnYDNtT+7TH1aMKXS+7qH7gTvyx/3R2HxSZzMi1w+yqV57Q1bXXjZPGy16VZLQxcDB/E4YUYNxAd0bysHsnsXF9/IqhOE6JaVTvZ4zuGRjNHbZY+CjW9tNYVOP1Pv7dSTHewS3eNNJGoQBoF4Xy5JamNPgRg9qobfjpoOUxw6fNxVs3jAqLVcMGrdOmikM6N88LBncCAPx9TiAi6C9GFYZd+5AySnl2wSZTN8vaej+ue2kpFm/ei9vH9rbM45vF2yz3aRnbr33EBsGpiiLayq7tOUfi5tN64nBNnaORkIrZdRnu/fSdlM+uDDkG63Colxmfbmas/uyOBItFa9SvQwu0UOaP1PsZk8/ph4GdWuFkCxfRmCe4Gd6Gz2BzMJuclwgSsU5EaC1sz28VpMELBjUI1r8/t9dDvrl0W5hgMI4YJr+72vX9VfVVVXUd6v2MLB9h657DuPXNFXjhmuF45NP1WLw5sFh9bZTxnP46YQB6tm+OYce2drT2r9OeqNlhX995Gk566HP785T/jRxWZrcVwixfzOy68XAiuNyMGEI2hvg0JjGrYxyE67a+N5ClfEt1fj9ys324ZJhNMD/D9dwbn+1/J5pENtK+JKiSGrxgUFVJlRGCsKn1f5tGVWMUDGaeEVqWbdmL77YdwK9O6hZM0841eHd5OXof0wK3vrECZXsO4/mvN+PVxSGPhCdthFfn1k0s5yNcbTJSscO5iiH8uNZNI0/QunBoAdbtrLIdAcWC6YiBoxgxOHgMdkLU2sbgKhumfGsyx8MtThZ8sWqKiCjogedkfkasGB+Z7rEnUUok4tbqPWTEkECcLnmo9gzPeOzLYJrbGdGXPL0QAIKCod4fsEmobNh1UDf72E4QGHn9NyNx8sP2PXWnOB0xmDVwTp5n45ws3GOzVKWReFQ+P7NrG4OzSVQ2gsHwgII2hhjK8/aNo7Cp4hA65cVusPcRYfygjhhlEncoUh4JAVffz9dXWIaQsD0/zqqlRBNUCybgXsmYGyWCwWE4ZfXdaN1ZnciFRSariRVOmokf7jsHJTsqdYu5TI9y1a8OLRtbNhTRLEDudMRg9sHaLTMZLa5DP5vknxlo5IHLmJ0cNApYfxxsDEWFbeLmspjlI/z758dHda6PCNeMLsT44zo5Wi8h3Hjs7hkYz0+yU1JiPYTUEUPC7iiCIczX3AqzyuxkCD3lPXO7w/7DNbplKWOhTrFN9GrfHBt2HcSVJ3TFTaf0QMXB6qgCzMVifE72zG/AOna7FyMG22dl2KXaGBIRBM0Jsc8+JteL6MRwR8P90/8ZOiUZXknirupQlWRWl1/+dgu22vjv//LFJdhYcch03zUvLsUd//vO0b3vHNdXt5qUkXp/YBTz4CXHoU2zXFw2rDO6tGmK47u2Rs/21tE53/3taNN0t8Znr8OSu6185jYGxoBO7kIJONHB2y6rafjtt5ng5gXGWFRGnLxn7asdf1xoNm7MBnSXp6fciCGhxmf1njKPIWFoPUc6mfgQq5hVhGe+3Igxf7fW639us+av1XKgV4zoikcuGxwMSgYEQlRMGBK2kF0QdQLesGNbY/nUM4OzoCNxfNfW+JPJWs1OGkTAWh0Q715UfLySAlEu779oUPhOq+s4aL3sGijjNxNUJSVAM1324Hg8fdUw22NsZz6bpP37yujUTmbX++WJhVFfK3A9vbtqskjEuwxNcEscDV4waOlrE5zKqzjzAPDm9SOD23ed2xeXDuuM138TSuvSpqltJb7jrPDG3Sk3n9YzLC0Wd1UA+Oz2U6LOTzwwFQwIxSmyQxvjx0mv1O5ZGXedr8xrOa1vbKsOxgsn6hgrXbrbyXHa40d0a4NJ4/q6Oj/MK8nQcuUrCyldMcJ6ZB1PEtpIB1VJMmJIGIoWBh/dclLEeCzrd1ZFdY/CtvZeG/00Kg41Xk67Fnq3T6OHi0rZg+NxzejCqPJlRbQ68LH9ApEne7ZvHtYAq/sSgVkvzspV1Hjkkiljg3YZJ/LRrnE1diaGdMlD2YPjbdV7iSQWW4fbU68dXRisBz5ybyMwHm88u1WTHJQ9OB6/Prm7u4xFiSrkhxU6G53HQjKMzw1eMDAYHVo2xqDOrWwbgq827MbZ/3S2ZrGRU/u0t92vnSGrVoBcg+0jkQZLp5VW7QWqx195QmhVVmN2n74qejWE63xpDstTorEO7NRKuYb+WLPKFoycGeF+RBEmiaWGfTSIUW1o9kk5NdC7VaE0a5TtSo0Xfr8QDG9H8E44pXc+yh4cjx759iPQeCDzGJIAs7YCW39ssXgQGeOftG2Wi34dW+Lr0t2Y+fuTwmbIAuGTo7SC4aqRXfHfRVuNpyScsOG9JsVYcVUj/5n9ox85OB1Kaxv0s/ofgz+e3UezZrPzBiXSkYTUa/ztCDfghmd+xV/OBDPw/Nf2rtOJLrfdzOdUfQfLp56JuiijFWhJxpwNEQwINQBefWAdNYLh49+dhG7tmuni/Dtp8LSqpEQsBu6EsGxofquNzrBjW+MZxQi6dMpYtGzi/Sdn7AmHhEJkVRKA4JAhUq+UiFwZn5ONsYExG4UaZ4dbfZpRlS2OjyPVnq0ZbVws02pLJqmSiOjvRLSOiFYR0XtElKekFxLRESJaqfw9ozlnGBGtJqJSInqcEuCsHBgxKCoRj+5xTMtQwzSwoFXY4i92xVSNoVkadVOWU7chj7GLf6Nm8ZiWjYK+7vktGsUUajqaz8GYR7WytrCZ36FWwIiCIcIxqdZ2GeWAfd7tMx+NalON02W2KFMkdCME4z7XV0sv1PIl0vjsZfftMwCTmbmOiB4CMBnAncq+jcw8xOScpwFcD2ARgFkAxgH4xMM8JmQGY4eW1m6wdsy5bQzaNQ80ZNoRwwVDOuGFbzbHJW9AIMxCs0bZOOdfX7k6L3x4H65KSkbPThsm3Xj7Y9s2w/s3n4iNuw5GnEcSMSxEBCNqqvVqo5kLYFU7ojF5DS9sjdvH9sbPbebkWBHWCXFx/9d+fUJwdb9kM++OU1wvpZtRITGY+VPNz0UALrU7nog6AmjJzAuV3zMAXAiPBQM0NgYvnv8r141wFFjOjD4dQt4r2h5a/ziv+RptiAVjT0bbWKiNYjJm+UbqWQ3pkqcLhmh1fuR4QfaqpNQSC2aNawxeSVG8VyLCrWPDQ8s7Ozm06db4PNoiFHgyiMZYHapn8c2LHYnSSfwK+ga+GxGtIKIviehkJa0AgHY9ynIlLQwiup6IiomouKLCehKZE7Tr1cbDyGNsCEd2b2tqXHaLOmJolO1DrsP4Tp4Tpq/XjhgC/5NhD9HF6o/h9hEbTrK/fooNGOKan0S/Vzv1UaqEx/CKoFdSuoTdJqK5AMzm3U9h5g+UY6YAqAPwqrJvB4CuzLyHiIYBeJ+IBsC8g2X6JJh5OoDpAFBUVBTT02LmYIMWj+8rN8uHI/7QUDEny+coptLFQwsw0iTKpYoqcLzsgf/fuD74ZPVOx8fbDe/VxjkeQtEtrZvlYmjXPKzYuh9W/Xa7dx2yMUS+Vyx6+nQm2WqyZN8/kaj1LG3cVZl5rN1+IroGwHkAzmBlfM7M1QCqle1lRLQRQG8ERgjalT46A9gOj/FzqAFQv7VfjDwWryzaEtX11J79dSd1C8aqMa7kZcZjPxsS4brejxJ+e2pP/PbU8JnQVth5+KgV12ksqnhz6bDOimAwx8noMOJiNbBv/FOt6YpGUFl6JSX4tWrzHnjuib1/KpARqiQiGoeAsfkCZj6sSc8noixluzuAXgA2MfMOAFVENFLxRroawAde5U8loEpSvZJi75VXKSuydcprEtTdx2Ph8hRxRNIR1tBoRwxKfpMlGGKpRBx0V7U/LjDBzW5/arVebnITKevJVCVp62xDIDTzOTNCYjwJoAWAzwxuqWMArCKi7wC8DeBGZt6r7LsJwHMASgFshNeGZ6iqJAVlI5Zhaj8bw/Alx1svfRiJVKwI4XrfcK8kL9ZncIPVY7NXJanG50gjBsKRGmsPk2RHADUSzSdk1Rgl2qkgBT//xKEKhnRRJdnBzKY6CWZ+B8A7FvuKAQz0Kk+m9wTCWjgrvfgVI7rg9SXbLK+16f5zMW1WCUp2VIZ5xvxw3zmW8Y6c0FgxON91br+orxFv7GajJluVFI865MRddWOF9Yz4VGvM4pmdRHdU7NR6KfaY407QxpDAezb4mc9gzcxn5b9VbyiSq5nPF/p8jdI9Vk+i7Cwfyh4cH9M14o3dBDe13UiWYFBfgFWjYdeYhFRJ5keN7dcec0t2RbYxpJhkcJOftspEwHbNErUQj2BF8LVlwoghXWBwyMag+t5bVCAnPf5kuJYljbARQyhBbVzjoUqK5UlG7vW774n2PqZFQDAQ2YfddpC/ROJGTl02rAuyfT5MGNLJuwy5IMVkbEIJyYXMsDGkBczhFdjKWJxl0vt98drhltfNdEJCMID2sakfcdJGDBEIRawMf1GRQmKoyX5mW6eAlBsxuDjW5yNcMqxzWDDHZGGX9xR7zHEnuFBPJnglpQv66KoBrHqBZulGIRJcVCM+2bPkiz+eitm3nRz5QA8xNpzan2r45ngIhmjqfeTn7+CqVoZrZcfhmvoIITEi3yKhpHMLapL1V399grIrjcvlgGS8NlElgcN15RYvwqyNC6v8CfIgKGzXzNsbOCD8MYVSmiiB0nLiMEs7mkepPv+IcxHMXnYEd1Vduk3mUq3BSq3cxE4szhzpiBifE4h2xMARDJZdWgdWoOrboQXWKau5+Yjw0CWD0FnZpzYGTmY7pzuhUCL63wDQVBkxJLvueuKuqkmv91u/52SX3Uh6DxjSOPMxEnJoERtDwvBzeANgVYF6tm+OBX86DTee0kN37M+Gd8WJSqCuVGsMvMTOK0kNa72rsjoO90kOVu9Sm1xvV1lT7FtI58Y1nYVarBhteYmgwQsGgMOqi1VPMSfLh65tm9qOBuyMmpmGXdjtn488Fo2yfTitr/2ypk6I5kmqc1GMS6SqxOKuqn3/fpsRQ6o1xOncaTHLeubXMBUxPiccM+MzEFAXqQt+q6iNzbiBmriBhpeVjIBXqYL2MXZr1wzr7zsHQ7rkJSUvlw3rghtO6Y7bzuwd92vX1IWWa7QdMaQY6dzrtvXwSuNyOUFGDElAG3ZbhQiYfdsYXD3qWF266mHTNDcbo7q3DZ5vdd1Mx27mc7LJzfZh8jn90LyRuRnNrqGJ5K5arRUMdiOGFHoeQOqNYARnBN+a2BgShzbstkowDLfhWK0XREhlpD/GZ5GeiYS5q2ZYw2PVsGsFQ1o5GaTx69G+i3R65PEgOI8hgfcUwQDriVph8xucKGmpAXklKf/VkqZaD9kOexuDvXdadV0ocJ7diCHVUMvTumkOlkw5I6l5cYupjSF9Hn1MhLySEndPEQzamc+q77ull6LJiMEgx42NZSbj1Jsr3fjFyIAK0UqVpI2o6vebHpKSqO9rZPe2aN8iunXIk4X2VdhEe89IkhFmRwQDEPalhVQi1p+clZGZkiHek0T43L7MqKJ3nz8AP9x3jmVolMMawTDKZtW9VCO930565z4WkuHQIoKBzdxV9f/NsPIUaNc8EI2ytRKdMpNJZeNzLPh8ZBsN93BNXXD792f0wtd3noaCvCZhx6Xa47Cyi6Ub6Z5/tyTjvTXYmc+7D1bj3eXl2Lz7ENq3MA8t7KRiG+crXDGiK5rkZOHCoQVxyGVqkwmqpGiyrB0xZPkoOOs91UnH96NiHrmkYUiITkqn45Jh0S/05ZYGKxjeXlaOBz9ZBwA4pqVe3+okKqaVp0CWEpWyIZIpqqRIHLZZtS2Vicf7eeuGUdh3uCYOuXGHac4j2AQzhTbNcrHx/nMTOkGxwaqStJFSw3Xlyn8nsfYbRqfFEZleQVXuvWAA+nZogR/uO8f2uJQLux2H7Izo1gZnD+gQ+cA4Y+b4EfzdADokWT5K6PfkmWAgonuI6EdlveeVRHSuZt9kIiolovVEdLYmfRgRrVb2PU4ePoksszkJweBpSrrm+NX3nOVVVjKGdKye0cj1Mb3zMfu2MTGvyic4Jx2/rXTG6y/7H8w8RPmbBQBE1B/ARAADAIwD8BQRZSnHPw3gegC9lL9xXmVMJxgsgsFpxVKLxjn6Y5LgQpbqpFgHWTCgut+m+xybNM9+WpCMLs8EAG8wczUzbwZQCmAEEXUE0JKZF3LAojsDwIVeZULnimgTDM6KSef0xaCCVhjRLX3cFT0jWFG9kQxq+JGiY1vH/drxynE6BE1M1CJSXiCdjsTitWC4hYhWEdELRKTW6gIA2zTHlCtpBcq2MT0MIrqeiIqJqLiioiKqjNnaGIKqJOuvsW+HlvjodydZxuIR4seY3vn4/q9n44TuIoRjQQ00mw5CzIhZXUy/UqQPMQkGIppLRGtM/iYgoBbqAWAIgB0AHlVPM7kU26SHJzJPZ+YiZi7Kz8+PKu/aaMzqTYKrfimSQXopDknAc2qam9oC2GyUmWrfTzLWDo4XtjOfU+w5ZwIx1TZmHuvkOCJ6FsDHys9yAF00uzsD2K6kdzZJ9wRtuIMjBvdD+c4ynzRsG2MmS2wMgkO89ErqqPl5EYA1yvaHACYSUSMi6oaAkXkJM+8AUEVEIxVvpKsBfOBV/rSC4VB1nW6f9ECEeJBqn5FPqe1pFPdPSBJejs8fJqIhCHTOygDcAADMvJaI3gLwPYA6ADczs9plvwnASwCaAPhE+fMEbeN/SAlxYFxAXgRE5hLvV5sOevt09koynfmcfsVIGzwTDMz8C5t90wBMM0kvBjDQqzxZYZzJ6sT4LKQ3XrUpj10+GH946zuPrh4bvnS2MdgFtJRqGndkhg5MVEnqf/ngBIeoht2BBa1wfNc8JS2JGTIh00YMgnc0WMGgrRtGnat8hIJbIi3ukwqoU3fSUTCYIZNLvaPBCgY7xMbgEqmfQVL5m8kUd1UVdQ32ZinuypyOyBMFUNjWEDZZbAxRkcqNopDe6zGY1cUTurXBn87ugytGdE1CjjKbBisY1EpS2LYp3rlptH6f4Rghc4n/O6aUHUCFQmKkag6t0b6nnCx1RE+4+bSeScpRZtNgBYPK0K6t0VZZdS0UXVUkghDOG9ePREVVte0xRMCQLnlYsXU/2jQzXwAqWYRsDMnNRzRoa+Tbho6cEH8avGDQog6xfUFVkiCEGOkwVtNd5/bDpcM6o1u7Zh7nyB2Uxl5JWnof0yLZWch4xPhsgpM1nwXBDELAKDqgU6tkZyUMSucRg9TFhNJgBYNdpylk6JKv0Yx+HVsmOwtCFPjS2fosdTGhiCrJBIneaM+7N43GQcOkQCFAKtun0trGkLqPNSNpsILByYcmH6M5TXKz0CQ3Kyw9nTqiXsU2SuVPJq1nPic7Aw2MBqtKapITaNhaNA6XjcH1GBKao/Tl2WuKcM2oY9E9xYytgjnpOGIQEkuDHTGcPaADppzbD1eeEJoco9aX0DwGEQ1O6JHfHPdOSHjsw5QklT+ZUBC99JMMUhcTS4MVDD4f4TdjupvuC7MxJCA/QmaQyrPl09n2nLpPNTNpsKokO4KxkpKcD8E7GmIPNK1tDA3vdSUVEQwmGOcxpF81EiLhmfE5hRuwdI6umsojsUxEBIMJoVkM8jEKmUMwumqS8yGkPl6u+fwmEa1U/sqIaKWSXkhERzT7ntGcM4yIVhNRKRE9Tgke76u9SJ9Pf1sRD0ImkNY2BqmECcXLpT1/pm4T0aMADmh2b2TmISanPQ3gegCLAMwCMA4ervtspLY+UGNC0RsTdWchecTnJQc92lL4m0lnG4OQWDxXJSm9/ssBvB7huI4AWjLzQg503WcAuNDr/GmpVxy8s32iYROiI5WN2mltY0jdx5qRJKIFPBnAT8y8QZPWjYhWENGXRHSyklYAoFxzTLmSljDq/H4AQLZPRgyCO9LhU8lrkgsAOLt/hyTnxD2pLHAzkZhUSUQ0F4DZVzaFmT9Qtq+AfrSwA0BXZt5DRMMAvE9EA2Bet0y7NkR0PQIqJ3TtGr/VmxS5gCyjjUE+SiEDaNU0B8v+PBZ5TXOTnRUhxYlJMDDzWLv9RJQN4GIAwzTnVAOoVraXEdFGAL0RGCF01pzeGcB2i/tOBzAdAIqKiuI2Lg6OGDQrRAlCJqEuSpVuSE1MLF6rksYCWMfMQRUREeUTUZay3R1ALwCbmHkHgCoiGqnYJa4G8IHZRb1CtTFkKTYG+RgFp6Sf1j69kD5aYvE6JMZEhBudxwD4KxHVAagHcCMz71X23QTgJQBNEPBGSphHEgDUqYKBxMYgCKmEzClKLJ4KBma+1iTtHQDvWBxfDCBp0dhCIwb5CAV3yBcjZBLil6lBHTEE5zFIdc9Y4q36EVWSt8joPbGIYNBgHDHIxygIqYFUxcQigkGD6pUUFAzJzIyQVsi34jHygBOKCAYN9fXmNgb5JjOXeI0KRZXkLaLWTSwiGDTUsyEkhnyLGU8aRocQBM8RwaAhNzvwOEKqJJEMgpAKiL0vsTTYpT3NmPGrEzBz1Xa0ax4IGSAfY+YT73csn4w3yHNNLCIYNHRr1wy3nN4rLF0EhOAU0Ux5g4SnSSyiSrJBPkVBSA2kLiYWEQw2BJdClG5gxiHvVBCsEcFgg/RSBCE1EE1SYhHB4AD5KDMPr96pfCreIB6CiUUEgw0iEAS3iIbKI6QuJhQRDDZIL0UQUgPppCUWEQx2yMeYsXhlfJZPRsgERDA4QEYOgpBcpAYmFhEMNsjwVRBSA5ngllhEMNggn2LmE693nNc0EEZFVv/zBnmqiSUmwUBElxHRWiLyE1GRYd9kIiolovVEdLYmfRgRrVb2PU5KV4CIGhHRm0r6YiIqjCVv8UB6KYJTXrx2OP524UAc07JxsrOSkUhVTCyxjhjWALgYwAJtIhH1BzARwAAA4wA8RURZyu6nAVwPoJfyN05Jvw7APmbuCeAfAB6KMW/xQz5KIQIdWjXGL0Yem+xsCEJciEkwMHMJM6832TUBwBvMXM3MmwGUAhhBRB0BtGTmhczMAGYAuFBzzsvK9tsAzqAkd9mDNxfndEFIKuIAkli8sjEUANim+V2upBUo28Z03TnMXAfgAIC2ZhcnouuJqJiIiisqKuKcde19PLu0IAgukLqYWCKG3SaiuQA6mOyawswfWJ1mksY26XbnhCcyTwcwHQCKioo8689LLyVz6dy6CQBgcJe85GZEEFKQiIKBmcdGcd1yAF00vzsD2K6kdzZJ155TTkTZAFoB2BvFveOPyIeMY2BBK8y5bQx6tW+e7KwIQsrhlSrpQwATFU+jbggYmZcw8w4AVUQ0UrEfXA3gA8051yjblwKYr9ghkoYMXzObPh1awCfupWmB1MXEEtMKbkR0EYAnAOQDmElEK5n5bGZeS0RvAfgeQB2Am5m5XjntJgAvAWgC4BPlDwCeB/AKEZUiMFKYGEveBEHIHEStm1hiEgzM/B6A9yz2TQMwzSS9GMBAk/SjAC6LJT+CIGQmMmJILDLz2QHyTQpCcpE6mFhEMAiCIAg6RDA4IFsMlIKQVCQ8TWKJycaQ6TTOycLvz+iFcweZTeMQBCFRiFhILCIYIvCHM3snOwuC0OCRAUNiEVWSIAgpj6iSEosIBkEQBEGHCAZBEARBhwgGQRAEQYcIBkEQBEGHCAZBEARBhwgGQRAEQYcIBkEQBEGHCAZBEARBhwgGQRAEQYcIBkEQBEGHCAZBEARBhwgGQRAEQUdMgoGILiOitUTkJ6IiTfqZRLSMiFYr/0/X7PuCiNYT0Urlr72S3oiI3iSiUiJaTESFseRNEARBiI5Yw26vAXAxgP8Y0ncDOJ+ZtxPRQABzABRo9v9cWftZy3UA9jFzTyKaCOAhAD+LMX+CIAiCS2IaMTBzCTOvN0lfwczblZ9rATQmokYRLjcBwMvK9tsAziCJtSsIgpBwEmFjuATACmau1qS9qKiRpmoa/wIA2wCAmesAHADQ1uyCRHQ9ERUTUXFFRYWXeRcEQWhwRBQMRDSXiNaY/E1wcO4ABFRCN2iSf87MgwCcrPz9Qj3c5BJsdl1mns7MRcxclJ+fHykbgiAIggsi2hiYeWw0FyaizgDeA3A1M2/UXO9H5X8VEb0GYASAGQDKAXQBUE5E2QBaAdgbzb0FQRCE6PFElUREeQBmApjMzN9o0rOJqJ2ynQPgPAQM2ADwIYBrlO1LAcxnZtMRgyAIguAdsbqrXkRE5QBGAZhJRHOUXbcA6AlgqsEttRGAOUS0CsBKAD8CeFY553kAbYmoFMAfAEyKJW+CIAhCdMTkrsrM7yGgLjKm3wfgPovThllc6yiAy2LJjyAIghA7MvNZEARB0CGCQRAEQdAhgkEQBEHQIYJBEARB0CGCQRAEQdAhgkEQBEHQIYJBEARB0CGCQRAEQdAhgkEQBEHQIYJBEARB0CGCQRAEQdAhgkEQBEHQIYJBEARB0CGCQRAEQdAhgkEQBEHQIYJBEARB0CGCQRAEQdAhgkEQBEHQEeuaz5cR0Voi8hNRkSa9kIiOaNZ7fkazbxgRrSaiUiJ6nIhISW9ERG8q6YuJqDCWvAmCIAjREeuIYQ2AiwEsMNm3kZmHKH83atKfBnA9gF7K3zgl/ToA+5i5J4B/AHgoxrwJgiAIURCTYGDmEmZe7/R4IuoIoCUzL2RmBjADwIXK7gkAXla23wZwhjqaEARBEBKHlzaGbkS0goi+JKKTlbQCAOWaY8qVNHXfNgBg5joABwC0NbswEV1PRMVEVFxRUeFN7gVBEBoo2ZEOIKK5ADqY7JrCzB9YnLYDQFdm3kNEwwC8T0QDAJiNAFi9lc0+fSLzdADTAaCoqMj0GEEQBCE6IgoGZh7r9qLMXA2gWtleRkQbAfRGYITQWXNoZwDble1yAF0AlBNRNoBWAPa6vbcgCIIQG56okogon4iylO3uCBiZNzHzDgBVRDRSsR9cDUAddXwI4Bpl+1IA8xU7hCAIgpBAYnVXvYiIygGMAjCTiOYou8YAWEVE3yFgSL6RmdXe/00AngNQCmAjgE+U9OcBtCWiUgB/ADAplrwJgiAI0UHp3ikvKiri4uLiZGdDEASPeXd5OTq2aoJRPUx9UgSXENEyZi4y2xfRxiAIgpAKXHx858gHCXFBQmIIgiAIOkQwCIIgCDpEMAiCIAg6RDAIgiAIOkQwCIIgCDpEMAiCIAg6RDAIgiAIOkQwCIIgCDrSfuYzEVUA2BLl6e0A7I5jdlKNTC5fJpcNyOzyZXLZgPQp37HMnG+2I+0FQywQUbHVlPBMIJPLl8llAzK7fJlcNiAzyieqJEEQBEGHCAZBEARBR0MXDNOTnQGPyeTyZXLZgMwuXyaXDciA8jVoG4MgCIIQTkMfMQiCIAgGRDAIgiAIOhqsYCCicUS0nohKiSjtlhEloi5E9DkRlRDRWiK6VUlvQ0SfEdEG5X9rzTmTlfKuJ6Kzk5d7ZxBRFhGtIKKPld+ZVLY8InqbiNYp73BUhpXvduW7XENErxNR43QtHxG9QES7iGiNJs11WYhoGBGtVvY9rqx7n5owc4P7A5CFwHrT3QHkAvgOQP9k58tlGToCOF7ZbgHgBwD9ATwMYJKSPgnAQ8p2f6WcjQB0U8qflexyRCjjHwC8BuBj5Xcmle1lAL9WtnMB5GVK+QAUANgMoIny+y0A16Zr+RBYw/54AGs0aa7LAmAJgFEACIG17s9Jdtms/hrqiGEEgFJm3sTMNQDeADAhyXlyBTPvYOblynYVgBIEKuQEBBodKP8vVLYnAHiDmauZeTOAUgSeQ0pCRJ0BjAfwnCY5U8rWEoHG5nkAYOYaZt6PDCmfQjaAJkSUDaApgO1I0/Ix8wIAew3JrspCRB0BtGTmhRyQEjM056QcDVUwFADYpvldrqSlJURUCGAogMUAjmHmHUBAeABorxyWbmX+J4D/A+DXpGVK2boDqADwoqIqe46ImiFDysfMPwJ4BMBWADsAHGDmT5Eh5VNwW5YCZduYnpI0VMFgpttLS79dImoO4B0AtzFzpd2hJmkpWWYiOg/ALmZe5vQUk7SULJtCNgKqiaeZeSiAQwioI6xIq/Ip+vYJCKhSOgFoRkRX2Z1ikpay5YuAVVnSqowNVTCUA+ii+d0ZgaFuWkFEOQgIhVeZ+V0l+Sdl2Arl/y4lPZ3KfCKAC4ioDAE13+lE9F9kRtmAQH7LmXmx8vttBARFppRvLIDNzFzBzLUA3gUwGplTPsB9WcqVbWN6StJQBcNSAL2IqBsR5QKYCODDJOfJFYpHw/MASpj5Mc2uDwFco2xfA+ADTfpEImpERN0A9ELAGJZyMPNkZu7MzIUIvJv5zHwVMqBsAMDMOwFsI6I+StIZAL5HhpQPARXSSCJqqnynZyBgA8uU8gEuy6Kom6qIaKTyTK7WnJN6JNv6naw/AOci4MmzEcCUZOcnivyfhMBQdBWAlcrfuQDaApgHYIPyv43mnClKedcjhT0iDOU8FSGvpIwpG4AhAIqV9/c+gNYZVr57AawDsAbAKwh46aRl+QC8joCtpBaBnv910ZQFQJHyPDYCeBJK5IlU/JOQGIIgCIKOhqpKEgRBECwQwSAIgiDoEMEgCIIg6BDBIAiCIOgQwSAIgiDoEMEgCIIg6BDBIAiCIOj4f0qHcimOq2lIAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "def play_episode(env, agent, max_episode_steps=None, mode=None, render=False):\n",
    "    observation, reward, done = env.reset(), 0., False\n",
    "    agent.reset(mode=mode)\n",
    "    episode_reward, elapsed_steps = 0., 0\n",
    "    while True:\n",
    "        action = agent.step(observation, reward, done)\n",
    "        if render:\n",
    "            env.render()\n",
    "        if done:\n",
    "            break\n",
    "        observation, reward, done, _ = env.step(action)\n",
    "        episode_reward += reward\n",
    "        elapsed_steps += 1\n",
    "        if max_episode_steps and elapsed_steps >= max_episode_steps:\n",
    "            break\n",
    "    agent.close()\n",
    "    return episode_reward, elapsed_steps\n",
    "\n",
    "\n",
    "logging.info('==== train ====')\n",
    "episode_rewards = []\n",
    "for episode in itertools.count():\n",
    "    episode_reward, elapsed_steps = play_episode(env.unwrapped, agent,\n",
    "            max_episode_steps=env._max_episode_steps, mode='train')\n",
    "    episode_rewards.append(episode_reward)\n",
    "    logging.debug('train episode %d: reward = %.2f, steps = %d',\n",
    "            episode, episode_reward, elapsed_steps)\n",
    "    if np.mean(episode_rewards[-10:]) > 250:\n",
    "        break\n",
    "plt.plot(episode_rewards)\n",
    "\n",
    "\n",
    "logging.info('==== test ====')\n",
    "episode_rewards = []\n",
    "for episode in range(100):\n",
    "    episode_reward, elapsed_steps = play_episode(env, agent)\n",
    "    episode_rewards.append(episode_reward)\n",
    "    logging.debug('test episode %d: reward = %.2f, steps = %d',\n",
    "            episode, episode_reward, elapsed_steps)\n",
    "logging.info('average episode reward = %.2f ± %.2f',\n",
    "        np.mean(episode_rewards), np.std(episode_rewards))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
